注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

gmd20的个人空间

// 编程和生活

 
 
 

日志

 
 

Linux平台实现负载均衡(load balancing)的一种方案  

2009-10-26 17:52:40|  分类: linux相关 |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |
        使用Linux Virtual Server (http://www.linuxvirtualserver.org/) 来作load balancer 是可以的,好像IBM的集群系统也用的Linux Virtual Server。  ipvsadm 命令就是用来管理和配置 Linux Virtual Server 的,可以找一下这个命令的用法 (http://linuxcommand.org/man_pages/ipvsadm8.html)。

     负载均衡其实就是把 连到一个机器上的固定协议和端口的 网络连接按照一定的策略转发到其他几台机器上去而已。因为前几天一直在看iptables 和netfilter的东西,所以我觉得使用 iptables -t nat 命令使用那些nat 功能就可以做到的,不过可能需要不停的修改 nat规则。用法无看看iptables 的DNAT 和SNAT相关的东西。不过这样做应该是很麻烦的。

       所以就想知道到底这个load balancing 是怎么实现的。了解到很多实际的应用是使用"Linux Virtual Server 之后,就去看看它具体是怎么作的咯。根据官方文档可以知道关键技术是 “IPVS (IP Virtual Server)”
 http://kb.linuxvirtualserver.org/wiki/IPVS  
 http://www.linuxvirtualserver.org/software/ipvs.html


IPVS 在linux的内核源代码里面可以找到的http://lxr.linux.no/#linux+v2.6.31/net/netfilter/ipvs/

最初实现还是个中文名字的人贡献的。呵呵,看到国人很亲切啊!

大概浏览了一下源代码,其实就是 注册几个netfilter的hook函数,然后对数据包作 DNAT 和SNAT的处理。 不过为了管理连接方便,搞了一些结构出来便于管理而已。很多都是为了用户层交互配置那些的接口。其实看看别人怎么组织源代码和设计思想也是可以学到很多东西的。

   
 http://lxr.linux.no/#linux+v2.6.31/include/net/ip_vs.h  里面可以看到结构定义,例如:

 336/*
 337 *      IP_VS structure allocated for each dynamically scheduled connection
 338 */
 339struct ip_vs_conn {
 340        struct list_head        c_list;         /* hashed list heads */
 341
 342        /* Protocol, addresses and port numbers */
 343        u16                      af;            /* address family */
 344        union nf_inet_addr       caddr;          /* client address */
 345        union nf_inet_addr       vaddr;          /* virtual address */
 346        union nf_inet_addr       daddr;          /* destination address */
 347        __be16                   cport;
 348        __be16                   vport;
 349        __be16                   dport;
 350        __u16                   protocol;       /* Which protocol (TCP/UDP) */
 351
 352        /* counter and timer */
 353        atomic_t                refcnt;         /* reference count */
 354        struct timer_list       timer;          /* Expiration timer */
 355        volatile unsigned long  timeout;        /* timeout */
 356
 357        /* Flags and state transition */
 358        spinlock_t              lock;           /* lock for state transition */
 359        volatile __u16          flags;          /* status flags */
 360        volatile __u16          state;          /* state info */
 361        volatile __u16          old_state;      /* old state, to be used for
 362                                                 * state transition triggerd
 363                                                 * synchronization
 364                                                 */
 365
 366        /* Control members */
 367        struct ip_vs_conn       *control;       /* Master control connection */
 368        atomic_t                n_control;      /* Number of controlled ones */
 369        struct ip_vs_dest       *dest;          /* real server */
 370        atomic_t                in_pkts;        /* incoming packet counter */
 371
 372        /* packet transmitter for different forwarding methods.  If it
 373           mangles the packet, it must return NF_DROP or better NF_STOLEN,
 374           otherwise this must be changed to a sk_buff **.
 375         */
 376        int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 377                           struct ip_vs_protocol *pp);
 378
 379        /* Note: we can group the following members into a structure,
 380           in order to save more space, and the following members are
 381           only used in VS/NAT anyway */
 382        struct ip_vs_app        *app;           /* bound ip_vs_app object */
 383        void                    *app_data;      /* Application private data */
 384        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
 385        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 386};
 387



 428/*
 429 *      The information about the virtual service offered to the net
 430 *      and the forwarding entries
 431 */
 432struct ip_vs_service {
 433        struct list_head        s_list;   /* for normal service table */
 434        struct list_head        f_list;   /* for fwmark-based service table */
 435        atomic_t                refcnt;   /* reference counter */
 436        atomic_t                usecnt;   /* use counter */
 437
 438        u16                     af;       /* address family */
 439        __u16                   protocol; /* which protocol (TCP/UDP) */
 440        union nf_inet_addr      addr;     /* IP address for virtual service */
 441        __be16                  port;     /* port number for the service */
 442        __u32                   fwmark;   /* firewall mark of the service */
 443        unsigned                flags;    /* service status flags */
 444        unsigned                timeout;  /* persistent timeout in ticks */
 445        __be32                  netmask;  /* grouping granularity */
 446
 447        struct list_head        destinations;  /* real server d-linked list */
 448        __u32                   num_dests;     /* number of servers */
 449        struct ip_vs_stats      stats;         /* statistics for the service */
 450        struct ip_vs_app        *inc;     /* bind conns to this app inc */
 451
 452        /* for scheduling */
 453        struct ip_vs_scheduler  *scheduler;    /* bound scheduler object */
 454        rwlock_t                sched_lock;    /* lock sched_data */
 455        void                    *sched_data;   /* scheduler application data */
 456};
 457
 458
 459/*
 460 *      The real server destination forwarding entry
 461 *      with ip address, port number, and so on.
 462 */
 463struct ip_vs_dest {
 464        struct list_head        n_list;   /* for the dests in the service */
 465        struct list_head        d_list;   /* for table with all the dests */
 466
 467        u16                     af;             /* address family */
 468        union nf_inet_addr      addr;           /* IP address of the server */
 469        __be16                  port;           /* port number of the server */
 470        volatile unsigned       flags;          /* dest status flags */
 471        atomic_t                conn_flags;     /* flags to copy to conn */
 472        atomic_t                weight;         /* server weight */
 473
 474        atomic_t                refcnt;         /* reference counter */
 475        struct ip_vs_stats      stats;          /* statistics */
 476
 477        /* connection counters and thresholds */
 478        atomic_t                activeconns;    /* active connections */
 479        atomic_t                inactconns;     /* inactive connections */
 480        atomic_t                persistconns;   /* persistent connections */
 481        __u32                   u_threshold;    /* upper threshold */
 482        __u32                   l_threshold;    /* lower threshold */
 483
 484        /* for destination cache */
 485        spinlock_t              dst_lock;       /* lock of dst_cache */
 486        struct dst_entry        *dst_cache;     /* destination cache entry */
 487        u32                     dst_rtos;       /* RT_TOS(tos) for dst */
 488
 489        /* for virtual service */
 490        struct ip_vs_service    *svc;           /* service it belongs to */
 491        __u16                   protocol;       /* which protocol (TCP/UDP) */
 492        union nf_inet_addr      vaddr;          /* virtual IP address */
 493        __be16                  vport;          /* virtual port number */
 494        __u32                   vfwmark;        /* firewall mark of service */
 495};
 496



这些都是用来管理那些 ip和端口的数据要负载均衡(转发)到哪些 ip和 端口去的吧。


整个的代码从 http://lxr.linux.no/#linux+v2.6.31/net/netfilter/ipvs/ip_vs_core.c#L1403
的几个hook开始看,分析一下SNAT 和DNAT 功能怎么实现的就可以了。感觉也不是很难理解的,需要研究的时候再仔细看看吧。
使用仅仅是为了使用,了解一下负载均衡的概念和 ipvsadm命令的用法就可以了。

1403static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1404        /* After packet filtering, forward packet through VS/DR, VS/TUN,
1405         * or VS/NAT(change destination), so that filtering rules can be
1406         * applied to IPVS. */
1407        {
1408                .hook           = ip_vs_in,    去看看这些hook函数的实现就很容易读懂代码了
1409                .owner          = THIS_MODULE,
1410                .pf             = PF_INET,
1411                .hooknum        = NF_INET_LOCAL_IN,
1412                .priority       = 100,
1413        },
1414        /* After packet filtering, change source only for VS/NAT */
1415        {
1416                .hook           = ip_vs_out,   去看看这些hook函数的实现就很容易读懂代码了
1417                .owner          = THIS_MODULE,
1418                .pf             = PF_INET,
1419                .hooknum        = NF_INET_FORWARD,
1420                .priority       = 100,
1421        },
1422        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1423         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1424        {
1425                .hook           = ip_vs_forward_icmp,
1426                .owner          = THIS_MODULE,
1427                .pf             = PF_INET,
1428                .hooknum        = NF_INET_FORWARD,
1429                .priority       = 99,
1430        },
1431        /* Before the netfilter connection tracking, exit from POST_ROUTING */
1432        {
1433                .hook           = ip_vs_post_routing,
1434                .owner          = THIS_MODULE,
1435                .pf             = PF_INET,
1436                .hooknum        = NF_INET_POST_ROUTING,
1437                .priority       = NF_IP_PRI_NAT_SRC-1,
1438        },
1439#ifdef CONFIG_IP_VS_IPV6
1440        /* After packet filtering, forward packet through VS/DR, VS/TUN,
1441         * or VS/NAT(change destination), so that filtering rules can be
1442         * applied to IPVS. */
1443        {
1444                .hook           = ip_vs_in,
1445                .owner          = THIS_MODULE,
1446                .pf             = PF_INET6,
1447                .hooknum        = NF_INET_LOCAL_IN,
1448                .priority       = 100,
1449        },
1450        /* After packet filtering, change source only for VS/NAT */
1451        {
1452                .hook           = ip_vs_out,
1453                .owner          = THIS_MODULE,
1454                .pf             = PF_INET6,
1455                .hooknum        = NF_INET_FORWARD,
1456                .priority       = 100,
1457        },
1458        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1459         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1460        {
1461                .hook           = ip_vs_forward_icmp_v6,
1462                .owner          = THIS_MODULE,
1463                .pf             = PF_INET6,
1464                .hooknum        = NF_INET_FORWARD,
1465                .priority       = 99,
1466        },
1467        /* Before the netfilter connection tracking, exit from POST_ROUTING */
1468        {
1469                .hook           = ip_vs_post_routing,
1470                .owner          = THIS_MODULE,
1471                .pf             = PF_INET6,
1472                .hooknum        = NF_INET_POST_ROUTING,
1473                .priority       = NF_IP6_PRI_NAT_SRC-1,
1474        },
1475#endif
1476};


widebright的学习日记 ,呵呵

  评论这张
 
阅读(731)| 评论(0)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017