注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

gmd20的个人空间

// 编程和生活

 
 
 

日志

 
 

multipath-tool 源码阅读  

2011-03-15 19:04:01|  分类: linux相关 |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |

multipath userspace config tool的正在实现机制在http://christophe.varoqui.free.fr/refbook.html说的比较清楚的。我想关键部分就是磁盘映射和path是不是有效的检测机制了。

映射匹配很简单的了,就是根据磁盘的序列号(“name by LU WWID”)就可以了,不同path的同一个磁盘,应该是根据序列号认出是同一个的。磁盘path的选择就是靠一个后台守护进程在不停的检测了。

=============================

the multipathd daemon


This daemon can do everything the multipath command do, and additionaly, is in charge of checking the paths in case they come up or down. When this occurs, it will reconfigure the multipath map the path belongs to, so that this map regains its maximum performance and redundancy.

The implementation requirements filled by this daemon are :

  • Ensure naming stability of the multipathed LU (in complement of udev)
  • Configure the multipaths to maximize availability and performance
  • Ensure that failed paths get revalidated as soon as possible
  • Reconfigure the multipaths automatically when events occur (add/remove paths, switch path groups, ...)

============================================

代码在  http://git.kernel.org/gitweb.cgi?p=linux%2Fstorage%2Fmultipath-tools%2F.git

这个创建守护进程的函数不错!需要的时候抄一下,^_^

1525 static int
1526 daemonize(void)
1527 {
1528         int pid;
1529         int in_fd, out_fd;
1530
1531         if( (pid = fork()) < 0){
1532                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1533                 return -1;
1534         }
1535         else if (pid != 0)
1536                 return pid;
1537
1538         setsid();
1539
1540         if ( (pid = fork()) < 0)
1541                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1542         else if (pid != 0)
1543                 _exit(0);
1544
1545         in_fd = open("/dev/null", O_RDONLY);
1546         if (in_fd < 0){
1547                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1548                         strerror(errno));
1549                 _exit(0);
1550         }
1551         out_fd = open("/dev/console", O_WRONLY);
1552         if (out_fd < 0){
1553                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1554                         strerror(errno));
1555                 _exit(0);
1556         }
1557
1558         close(STDIN_FILENO);
1559         dup(in_fd);
1560         close(STDOUT_FILENO);
1561         dup(out_fd);
1562         close(STDERR_FILENO);
1563         dup(out_fd);
1564
1565         close(in_fd);
1566         close(out_fd);
1567         if (chdir("/") < 0)
1568                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1569
1570         return 0;
1571 }




1371 static int
1372 child (void * param)
1373 {
1374         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1375         pthread_attr_t log_attr, misc_attr;
1376         struct vectors * vecs;
1377         struct multipath * mpp;
1378         int i;
1379
1380         mlockall(MCL_CURRENT | MCL_FUTURE);
1381
1382         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1383         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1384
1385         if (logsink) {
1386                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1387                 log_thread_start(&log_attr);
1388                 pthread_attr_destroy(&log_attr);
1389         }
1390
1391         condlog(2, "--------start up--------");
1392         condlog(2, "read " DEFAULT_CONFIGFILE);
1393
1394         if (load_config(DEFAULT_CONFIGFILE))
1395                 exit(1);
1396
1397         if (init_checkers()) {
1398                 condlog(0, "failed to initialize checkers");
1399                 exit(1);
1400         }
1401         if (init_prio()) {
1402                 condlog(0, "failed to initialize prioritizers");
1403                 exit(1);
1404         }
1405
1406         setlogmask(LOG_UPTO(conf->verbosity + 3));
1407
1408         /*
1409          * fill the voids left in the config file
1410          */
1411         if (!conf->checkint) {
1412                 conf->checkint = DEFAULT_CHECKINT;
1413                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1414         }
1415
1416         if (conf->max_fds) {
1417                 struct rlimit fd_limit;
1418
1419                 fd_limit.rlim_cur = conf->max_fds;
1420                 fd_limit.rlim_max = conf->max_fds;
1421                 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
1422                         condlog(0, "can't set open fds limit to %d : %s\n",
1423                                 conf->max_fds, strerror(errno));
1424         }
1425
1426         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1427                 if (logsink)
1428                         log_thread_stop();
1429
1430                 exit(1);
1431         }
1432         signal_init();
1433         setscheduler();
1434         set_oom_adj(-16);
1435         vecs = gvecs = init_vecs();
1436
1437         if (!vecs)
1438                 exit(1);
1439
1440         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1441                 condlog(0, "can not find sysfs mount point");
1442                 exit(1);
1443         }
1444         conf->daemon = 1;
1445         dm_udev_set_sync_support(0);
1446         /*
1447          * fetch and configure both paths and multipaths
1448          */
1449         if (configure(vecs, 1)) {
1450                 condlog(0, "failure during configuration");
1451                 exit(1);
1452         }
1453         /*
1454          * start threads
1455          */
1456         pthread_create(&check_thr, &misc_attr, checkerloop, vecs);
1457         pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs);
1458         pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
1459         pthread_attr_destroy(&misc_attr);
1460
1461         pthread_mutex_lock(&exit_mutex);
1462         pthread_cond_wait(&exit_cond, &exit_mutex);
1463
1464         /*
1465          * exit path
1466          */
1467         block_signal(SIGHUP, NULL);
1468         lock(vecs->lock);
1469         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
1470                 vector_foreach_slot(vecs->mpvec, mpp, i)
1471                         dm_queue_if_no_path(mpp->alias, 0);
1472         remove_maps_and_stop_waiters(vecs);
1473         free_pathvec(vecs->pathvec, FREE_PATHS);
1474
1475         pthread_cancel(check_thr);
1476         pthread_cancel(uevent_thr);
1477         pthread_cancel(uxlsnr_thr);
1478
1479         sysfs_cleanup();
1480
1481         free_keys(keys);
1482         keys = NULL;
1483         free_handlers(handlers);
1484         handlers = NULL;
1485         free_polls();
1486
1487         unlock(vecs->lock);
1488         /* Now all the waitevent threads will start rushing in. */
1489         while (vecs->lock.depth > 0) {
1490                 sleep (1); /* This is weak. */
1491                 condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
1492         }
1493         pthread_mutex_destroy(vecs->lock.mutex);
1494         FREE(vecs->lock.mutex);
1495         vecs->lock.depth = 0;
1496         vecs->lock.mutex = NULL;
1497         FREE(vecs);
1498         vecs = NULL;
1499
1500         condlog(2, "--------shut down-------");
1501
1502         if (logsink)
1503                 log_thread_stop();
1504
1505         dm_lib_release();
1506         dm_lib_exit();
1507
1508         cleanup_prio();
1509         cleanup_checkers();
1510         /*
1511          * Freeing config must be done after condlog() and dm_lib_exit(),
1512          * because logging functions like dlog() and dm_write_log()
1513          * reference the config.
1514          */
1515         free_config(conf);
1516         conf = NULL;
1517
1518 #ifdef _DEBUG_
1519         dbg_free_final(NULL);
1520 #endif
1521
1522         exit(0);
1523 }



1079 static void *
1080 checkerloop (void *ap)
1081 {
1082         struct vectors *vecs;
1083         struct path *pp;
1084         int count = 0;
1085         unsigned int i;
1086         sigset_t old;
1087
1088         mlockall(MCL_CURRENT | MCL_FUTURE);
1089         vecs = (struct vectors *)ap;
1090         condlog(2, "path checkers start up");
1091
1092         /*
1093          * init the path check interval
1094          */
1095         vector_foreach_slot (vecs->pathvec, pp, i) {
1096                 pp->checkint = conf->checkint;
1097         }
1098
1099         while (1) {
1100                 block_signal(SIGHUP, &old);
1101                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1102                 lock(vecs->lock);
1103                 condlog(4, "tick");
1104
1105                 if (vecs->pathvec) {    //多久检查一次应该是在这里了。
1106                         vector_foreach_slot (vecs->pathvec, pp, i) {
1107                                 check_path(vecs, pp);      ////检查path的状态
1108                         }
1109                 }
1110                 if (vecs->mpvec) {
1111                         defered_failback_tick(vecs->mpvec);
1112                         retry_count_tick(vecs->mpvec);
1113                 }
1114                 if (count)
1115                         count--;
1116                 else {
1117                         condlog(4, "map garbage collection");
1118                         mpvec_garbage_collector(vecs);
1119                         count = MAPGCINT;
1120                 }
1121
1122                 lock_cleanup_pop(vecs->lock);
1123                 pthread_sigmask(SIG_SETMASK, &old, NULL);
1124                 sleep(1);
1125         }
1126         return NULL;
1127 }









945 void
946 check_path (struct vectors * vecs, struct path * pp)
947 {
948         int newstate;
949         int oldpriority;
950
951         if (!pp->mpp)
952                 return;
953
954         if (pp->tick && --pp->tick)
955                 return; /* don't check this path yet */
956
957         /*
958          * provision a next check soonest,
959          * in case we exit abnormaly from here
960          */
961         pp->tick = conf->checkint;
962
963         newstate = get_state(pp, 1);   //////这里检测
964
965         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
966                 condlog(2, "%s: unusable path", pp->dev);
967                 pathinfo(pp, conf->hwtable, 0);
968                 return;
969         }
970         /*
971          * Async IO in flight. Keep the previous path state
972          * and reschedule as soon as possible
973          */
974         if (newstate == PATH_PENDING) {
975                 pp->tick = 1;
976                 return;
977         }
978         if (newstate != pp->state) {
979                 int oldstate = pp->state;
980                 pp->state = newstate;
981                 LOG_MSG(1, checker_message(&pp->checker));
982
983                 /*
984                  * upon state change, reset the checkint
985                  * to the shortest delay
986                  */
987                 pp->checkint = conf->checkint;
988
989                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
990                     update_multipath_strings(pp->mpp, vecs->pathvec)) {
991                         /*
992                          * proactively fail path in the DM
993                          */
994                         if (oldstate == PATH_UP ||
995                             oldstate == PATH_GHOST)
996                                 fail_path(pp, 1);         //////根据path状态,设置map吧
997                         else
998                                 fail_path(pp, 0);  //////根据path状态,设置map吧
999
1000                         /*
1001                          * cancel scheduled failback
1002                          */
1003                         pp->mpp->failback_tick = 0;
1004
1005                         pp->mpp->stat_path_failures++;
1006                         return;
1007                 }
1008
1009                 /*
1010                  * reinstate this path
1011                  */
1012                 if (oldstate != PATH_UP &&
1013                     oldstate != PATH_GHOST)
1014                         reinstate_path(pp, 1);
1015                 else
1016                         reinstate_path(pp, 0);
1017
1018                 /*
1019                  * schedule [defered] failback
1020                  */
1021                 if (pp->mpp->pgfailback > 0)
1022                         pp->mpp->failback_tick =
1023                                 pp->mpp->pgfailback + 1;
1024                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1025                     need_switch_pathgroup(pp->mpp, 1))
1026                         switch_pathgroup(pp->mpp);
1027
1028                 /*
1029                  * if at least one path is up in a group, and
1030                  * the group is disabled, re-enable it
1031                  */
1032                 if (newstate == PATH_UP)
1033                         enable_group(pp);    //////根据path状态,设置map吧
1034         }
1035         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1036                 LOG_MSG(4, checker_message(&pp->checker));
1037                 /*
1038                  * double the next check delay.
1039                  * max at conf->max_checkint
1040                  */
1041                 if (pp->checkint < (conf->max_checkint / 2))
1042                         pp->checkint = 2 * pp->checkint;
1043                 else
1044                         pp->checkint = conf->max_checkint;
1045
1046                 pp->tick = pp->checkint;
1047                 condlog(4, "%s: delay next check %is",
1048                                 pp->dev_t, pp->tick);
1049         }
1050         else if (newstate == PATH_DOWN)
1051                 LOG_MSG(2, checker_message(&pp->checker));
1052
1053         pp->state = newstate;
1054
1055         /*
1056          * path prio refreshing
1057          */
1058         condlog(4, "path prio refresh");
1059         oldpriority = pp->priority;
1060         pathinfo(pp, conf->hwtable, DI_PRIO);
1061
1062         /*
1063          * pathgroup failback policy
1064          */
1065         if (pp->priority != oldpriority &&
1066             pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio)
1067                 update_path_groups(pp->mpp, vecs);
1068         else if (need_switch_pathgroup(pp->mpp, 0)) {
1069                 if (pp->mpp->pgfailback > 0 &&
1070                     pp->mpp->failback_tick <= 0)
1071                         pp->mpp->failback_tick =
1072                                 pp->mpp->pgfailback + 1;
1073                 else if (pp->mpp->pgfailback ==
1074                                 -FAILBACK_IMMEDIATE)
1075                         switch_pathgroup(pp->mpp);
1076         }
1077 }













libmultipath/discovery.c
==========================

829 int
830 get_state (struct path * pp, int daemon)
831 {
832         struct checker * c = &pp->checker;
833         int state;
834
835         condlog(3, "%s: get_state", pp->dev);
836
837         if (!checker_selected(c)) {
838                 if (daemon)
839                         pathinfo(pp, conf->hwtable, DI_SYSFS);
840                 select_checker(pp);
841                 if (!checker_selected(c)) {
842                         condlog(3, "%s: No checker selected", pp->dev);
843                         return PATH_UNCHECKED;
844                 }
845                 checker_set_fd(c, pp->fd);
846                 if (checker_init(c, pp->mpp?&pp->mpp->mpcontext:NULL)) {
847                         condlog(3, "%s: checker init failed", pp->dev);
848                         return PATH_UNCHECKED;
849                 }
850         }
851         state = path_offline(pp);           ///状态来自这里
852         if (state != PATH_UP) {
853                 condlog(3, "%s: path inaccessible", pp->dev);
854                 return state;
855         }
856         if (daemon)
857                 checker_set_async(c);
858         if (!conf->checker_timeout)
859                 sysfs_get_timeout(pp->sysdev, &(c->timeout));
860         state = checker_check(c);
861         condlog(3, "%s: state = %i", pp->dev, state);
862         if (state == PATH_DOWN && strlen(checker_message(c)))
863                 condlog(3, "%s: checker msg is \"%s\"",
864                         pp->dev, checker_message(c));
865         return state;
866 }





703 int
704 path_offline (struct path * pp)
705 {
706         struct sysfs_device * parent;
707         char buff[SCSI_STATE_SIZE];
708
709         pp->sysdev = sysfs_device_from_path(pp);
710         if (!pp->sysdev) {
711                 condlog(1, "%s: failed to get sysfs information", pp->dev);
712                 return PATH_WILD;
713         }
714
715         parent = sysfs_device_get_parent(pp->sysdev);
716         if (!parent)
717                 parent = pp->sysdev;
718         if (parent && !strncmp(parent->kernel, "block",5))
719                 parent = sysfs_device_get_parent(parent);
720         if (!parent) {
721                 condlog(1, "%s: failed to get parent", pp->dev);
722                 return PATH_WILD;
723         }
724
725         if (sysfs_get_state(parent, buff, SCSI_STATE_SIZE))
726                 return PATH_WILD;
727
728         condlog(3, "%s: state = %s", pp->dev, buff);
729
730         if (!strncmp(buff, "offline", 7)) {
731                 pp->offline = 1;
732                 return PATH_DOWN;
733         }
734         pp->offline = 0;
735         if (!strncmp(buff, "blocked", 7))
736                 return PATH_PENDING;
737         else if (!strncmp(buff, "running", 7))
738                 return PATH_UP;
739
740         return PATH_DOWN;
741 }


其实根据上面那个函数,大概意思就是从 linux内核的sys文件系统里面提供的信息,读出磁盘状态的吧。比如我自己电脑磁盘对应的device状态。
cat /sys/block/sda/device/state
running

他并没有通过发送磁盘命令,比如scsi指令来探测磁盘状态? 可能这个文件里面的也是比较准确的吧,我知道像scsi磁盘,如果驱动执行命令不成功,会在linux的scsi驱动里面自动设置这个sys文件下的状态 的。可能使用sys文件可以统一代码,使与下面硬件无关吧。


  评论这张
 
阅读(1545)| 评论(0)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017