IPv4创建路由fib_info结构

内核维护了两个fib_info结构的哈希桶,其中fib_info_hash以fib_info结构中关键几个成员计数的哈希值为哈希桶的索引;而fib_info_laddrhash仅以本地地址来计数哈希桶的索引值。另外,fib_info_cnt表示系统中当前fib_info结构的数量,fib_info_hash_size表示系统中fib_info哈希桶的大小。

另外,全局哈希桶fib_info_devhash,大小为256,其中链表链接的为fib_info中的内嵌下一跳结构,此哈希桶以设备ID为哈希值,在设备状态改变时,需要操作此链表。

static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_cnt;#define DEVINDEX_HASHBITS 8
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];

fib_info结构创建

首先检查类型fc_type值的合法性,其次根据类型值检查配置的scope值是否合法,内核数组fib_props指定了路由类型对应的最小的合法scope值,内核根据此数组进行判定。例如路由本地类型RTN_LOCAL的scope不能小于RT_SCOPE_HOST,否则就成为外部路由了。之后,配置的路由标志不能包含RTNH_F_DEAD或者RTNH_F_LINKDOWN,这两个标志由内核自身使用。

struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack)
{struct fib_info *fi = NULL;struct nexthop *nh = NULL;struct fib_info *ofi;int nhs = 1;struct net *net = cfg->fc_nlinfo.nl_net;if (cfg->fc_type > RTN_MAX)goto err_inval;/* Fast check to catch the most weird cases */if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {NL_SET_ERR_MSG(extack, "Invalid scope");goto err_inval;}if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {NL_SET_ERR_MSG(extack, "Invalid rtm_flags - can not contain DEAD or LINKDOWN");goto err_inval;}

如果此路由项指定了下一跳ID,在没有指定metrics的情况下,在命名空间中查找是否存在可用的fib_info,找到的话返回此fi。反之,在指定metrics时,总是尝试创建新的fib_info结构,这里查询指定的下一跳ID是否存在。

对于多路径路由,函数fib_count_nexthops计算配置的路径数量。

    if (cfg->fc_nh_id) {if (!cfg->fc_mx) {fi = fib_find_info_nh(net, cfg);if (fi) {fi->fib_treeref++;return fi;}}nh = nexthop_find_by_id(net, cfg->fc_nh_id);if (!nh) {NL_SET_ERR_MSG(extack, "Nexthop id does not exist");goto err_inval;}nhs = 0;}
#ifdef CONFIG_IP_ROUTE_MULTIPATHif (cfg->fc_mp) {nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);if (nhs == 0)goto err_inval;}
#endif

如果fib_info数量超过哈希桶大小,接下来会发生(可能已经发生)哈希冲突,这里扩充哈希桶大小。在第一次创建fib_info结构时,fib_info_cnt和fib_info_hash_size的值都是零,将哈希桶的大小设置为16。

后续每次fib_info_cnt数量超过或者等于哈希桶的大小时,将哈希桶的大小每次增大一倍,函数fib_info_hash_move负责将旧的哈希桶中的fib_info,重新链接到新分配的哈希桶。

    err = -ENOBUFS;if (fib_info_cnt >= fib_info_hash_size) {unsigned int new_size = fib_info_hash_size << 1;struct hlist_head *new_info_hash;struct hlist_head *new_laddrhash;unsigned int bytes;if (!new_size)new_size = 16;bytes = new_size * sizeof(struct hlist_head *);new_info_hash = fib_info_hash_alloc(bytes);new_laddrhash = fib_info_hash_alloc(bytes);if (!new_info_hash || !new_laddrhash) {fib_info_hash_free(new_info_hash, bytes);fib_info_hash_free(new_laddrhash, bytes);} elsefib_info_hash_move(new_info_hash, new_laddrhash, new_size);if (!fib_info_hash_size)goto failure;}

对于内嵌下一跳的路由,下一跳结构fib_nh的数量(nhs)为1(多径路由时大于1),而对于指定下一跳ID的路由,fib_info中fib_nh的数量为0,不需要在此分配。分配fib_info结构之后,递增fib_info_cnt计数,进行必要的初始化。

    fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);if (!fi)goto failure;fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, extack);if (IS_ERR(fi->fib_metrics)) {err = PTR_ERR(fi->fib_metrics);kfree(fi);return ERR_PTR(err);}fib_info_cnt++;fi->fib_net = net;fi->fib_protocol = cfg->fc_protocol;fi->fib_scope = cfg->fc_scope;fi->fib_flags = cfg->fc_flags;fi->fib_priority = cfg->fc_priority;fi->fib_prefsrc = cfg->fc_prefsrc;fi->fib_type = cfg->fc_type;fi->fib_tb_id = cfg->fc_table;

如果配置的路由指定了下一跳ID,取得下一跳nexthop结构(递增引用计数),赋值给fib_info结构成员nh。否则,如果配置的路由为内置下一跳地址,以上在分配fib_info结构时,分配了一个fib_nh下一个结构(多径路由时nhs的值大于1),change_nexthops循环执行一次,将下一跳的nh_parent指针执向fib_info结构。

如果配置的路由为多径路由(fc_mp),change_nexthops执行多次,将所有的下一跳的nh_parent指针都指向新创建的fib_info结构。函数fib_get_nhs遍历所有的下一跳,进行相应的初始化。

对于非多径路由,由函数fib_nh_init初始化唯一的下一跳结构fib_nh。

    fi->fib_nhs = nhs;if (nh) {if (!nexthop_get(nh)) {NL_SET_ERR_MSG(extack, "Nexthop has been deleted");err = -EINVAL;} else {err = 0;fi->nh = nh;}} else {change_nexthops(fi) {nexthop_nh->nh_parent = fi;} endfor_nexthops(fi)if (cfg->fc_mp)err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);elseerr = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);}if (err != 0)goto failure;

对于特殊路由,如RTN_BLACKHOLE、RTN_UNREACHABLE和RTN_PROHIBIT等类型,不能指定网关、出口设备和多径属性。配置路由的scope值不能大于RT_SCOPE_HOST。值RT_SCOPE_NOWHERE由内核使用。

    if (fib_props[cfg->fc_type].error) {if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {NL_SET_ERR_MSG(extack, "Gateway, device and multipath can not be specified for this route type");goto err_inval;}goto link_it;} else {switch (cfg->fc_type) {case RTN_UNICAST:case RTN_LOCAL:case RTN_BROADCAST:case RTN_ANYCAST:case RTN_MULTICAST:break;default:NL_SET_ERR_MSG(extack, "Invalid route type");goto err_inval;}}if (cfg->fc_scope > RT_SCOPE_HOST) {NL_SET_ERR_MSG(extack, "Invalid scope");goto err_inval;}

继续进行scope的合法性检查,对于RT_SCOPE_HOST路由,不能配置多路径,也不能指定网关。

    if (fi->nh) {err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);if (err)goto failure;} else if (cfg->fc_scope == RT_SCOPE_HOST) {struct fib_nh *nh = fi->fib_nh;/* Local address is added. */if (nhs != 1) {NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");goto err_inval;}if (nh->fib_nh_gw_family) {NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway");goto err_inval;}nh->fib_nh_scope = RT_SCOPE_NOWHERE;nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);err = -ENODEV;if (!nh->fib_nh_dev)goto failure;} else {int linkdown = 0;change_nexthops(fi) {err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh, cfg->fc_table, cfg->fc_scope, extack);if (err != 0)goto failure;if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)linkdown++;} endfor_nexthops(fi)if (linkdown == fi->fib_nhs)fi->fib_flags |= RTNH_F_LINKDOWN;}

如果配置了优选源地址,函数fib_valid_prefsrc检查此地址的合法性。

    if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {NL_SET_ERR_MSG(extack, "Invalid prefsrc address");goto err_inval;}if (!fi->nh) {change_nexthops(fi) {fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, fi->fib_scope);if (nexthop_nh->fib_nh_gw_family == AF_INET6)fi->fib_nh_is_v6 = true;} endfor_nexthops(fi)fib_rebalance(fi);}

查找fib_info_hash哈希桶的相应链表,是否存在相同的fib_info结构,如果存在释放新创建的fib_info,返回旧值。否则,将新创建的fib_info结构链接到全局的fib_info_hash哈希桶,如果配置了优选源地址,同时将其链接到全局fib_laddr_hashfn哈希桶链表中。

link_it:ofi = fib_find_info(fi);if (ofi) {fi->fib_dead = 1;free_fib_info(fi);ofi->fib_treeref++;return ofi;}fi->fib_treeref++;refcount_set(&fi->fib_clntref, 1);spin_lock_bh(&fib_info_lock);hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]);if (fi->fib_prefsrc) {struct hlist_head *head;head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];hlist_add_head(&fi->fib_lhash, head);}

如果路由配置了下一跳ID,将此fib_info结构链接到nexthop链表,所有使用此下一跳ID的fib_info组成一个链表。反之,对于内置下一跳的路由,使用下一跳出接口索引作为哈希值,在哈希桶fib_info_devhash中确定链表头,将内置的下一跳fib_nh链接到链表上。

    if (fi->nh) {list_add(&fi->nh_list, &nh->fi_list);} else {change_nexthops(fi) {struct hlist_head *head;unsigned int hash;if (!nexthop_nh->fib_nh_dev)continue;hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);head = &fib_info_devhash[hash];hlist_add_head(&nexthop_nh->nh_hash, head);} endfor_nexthops(fi)}spin_unlock_bh(&fib_info_lock);return fi;err_inval:err = -EINVAL;failure:if (fi) {fi->fib_dead = 1;free_fib_info(fi);}

fib_info全局链表扩展

首先,遍历旧的fib_info_hash哈希桶(大小为old_size),以及每个链表,将标志的fib_info结构重新链接到新的哈希桶链表上(new_info_hash)。

static void fib_info_hash_move(struct hlist_head *new_info_hash,struct hlist_head *new_laddrhash, unsigned int new_size)
{struct hlist_head *old_info_hash, *old_laddrhash;unsigned int old_size = fib_info_hash_size;unsigned int i, bytes;spin_lock_bh(&fib_info_lock);old_info_hash = fib_info_hash;old_laddrhash = fib_info_laddrhash;fib_info_hash_size = new_size;for (i = 0; i < old_size; i++) {struct hlist_head *head = &fib_info_hash[i];struct hlist_node *n;struct fib_info *fi;hlist_for_each_entry_safe(fi, n, head, fib_hash) {struct hlist_head *dest;unsigned int new_hash;new_hash = fib_info_hashfn(fi);dest = &new_info_hash[new_hash];hlist_add_head(&fi->fib_hash, dest);}}fib_info_hash = new_info_hash;

其次,遍历旧的fib_info_laddrhash哈希桶,将链表中的fib_info结构重新链接到新的new_laddrhash哈希桶中的链表上。最后,释放旧的哈希桶old_info_hash和old_laddrhash。

    for (i = 0; i < old_size; i++) {struct hlist_head *lhead = &fib_info_laddrhash[i];struct hlist_node *n;struct fib_info *fi;hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {struct hlist_head *ldest;unsigned int new_hash;new_hash = fib_laddr_hashfn(fi->fib_prefsrc);ldest = &new_laddrhash[new_hash];hlist_add_head(&fi->fib_lhash, ldest);}}fib_info_laddrhash = new_laddrhash;spin_unlock_bh(&fib_info_lock);bytes = old_size * sizeof(struct hlist_head *);fib_info_hash_free(old_info_hash, bytes);fib_info_hash_free(old_laddrhash, bytes);
}

多路径路由

函数fib_count_nexthops计算netlink消息中下一跳rtnexthop的数量,即路径数量。

static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, int *remaining)
{                  int totlen = NLA_ALIGN(rtnh->rtnh_len);*remaining -= totlen;return (struct rtnexthop *) ((char *) rtnh + totlen);
}  
static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, struct netlink_ext_ack *extack)
{int nhs = 0;while (rtnh_ok(rtnh, remaining)) {nhs++;rtnh = rtnh_next(rtnh, &remaining);}/* leftover implies invalid nexthop configuration, discard it */if (remaining > 0) {NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthops");nhs = 0;}return nhs;
}

函数fib_get_nhs仅对于配置的内嵌下一跳的路由有效。以下遍历分配的所有下一跳结构,根据netlink消息中的配置值进行初始化。

static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack)
{struct net *net = fi->fib_net;struct fib_config fib_cfg;struct fib_nh *nh;change_nexthops(fi) {int attrlen;memset(&fib_cfg, 0, sizeof(fib_cfg));if (!rtnh_ok(rtnh, remaining)) {NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthop");return -EINVAL;}if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");return -EINVAL;}

取出每个下一跳中指定的网关,可使用gateway或者via指定网关,但是不能同时使用两者。以及,取出指定的FLOW和ENCAP属性字段,由函数fib_nh_init初始化下一跳结构。

        fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;fib_cfg.fc_oif = rtnh->rtnh_ifindex;attrlen = rtnh_attrlen(rtnh);if (attrlen > 0) {struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);nla = nla_find(attrs, attrlen, RTA_GATEWAY);nlav = nla_find(attrs, attrlen, RTA_VIA);if (nla && nlav) {NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA");return -EINVAL;}if (nla) {fib_cfg.fc_gw4 = nla_get_in_addr(nla);if (fib_cfg.fc_gw4)fib_cfg.fc_gw_family = AF_INET;} else if (nlav) {ret = fib_gw_from_via(&fib_cfg, nlav, extack);if (ret) goto errout;}nla = nla_find(attrs, attrlen, RTA_FLOW);if (nla)fib_cfg.fc_flow = nla_get_u32(nla);fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);if (nla)fib_cfg.fc_encap_type = nla_get_u16(nla);}ret = fib_nh_init(net, nexthop_nh, &fib_cfg, rtnh->rtnh_hops + 1, extack);if (ret) goto errout;rtnh = rtnh_next(rtnh, &remaining);} endfor_nexthops(fi);

如果配置了出口设备,并且出口设备与第一个下一跳路径配置的设备不相等,返回错误。如果配置了网关,但是首个下一跳路径没有配置网关,或者首个下一跳路径配置的网关与指定的全局网关不相等,返回错误。

以上可见,如果指定全局的出口设备和网关,第一个下一跳路径的出口设备和网关必须与之相等。对于flow值,有相同的限制。

    ret = -EINVAL;nh = fib_info_nh(fi, 0);if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {NL_SET_ERR_MSG(extack, "Nexthop device index does not match RTA_OIF");goto errout;}if (cfg->fc_gw_family) {if (cfg->fc_gw_family != nh->fib_nh_gw_family ||(cfg->fc_gw_family == AF_INET &&nh->fib_nh_gw4 != cfg->fc_gw4) ||(cfg->fc_gw_family == AF_INET6 &&ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {NL_SET_ERR_MSG(extack, "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");goto errout;}}
#ifdef CONFIG_IP_ROUTE_CLASSIDif (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {NL_SET_ERR_MSG(extack, "Nexthop class id does not match RTA_FLOW");goto errout;}
#endif

内核版本 5.10


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部