路由FIB通知链
在网络命名空间初始化时,初始化fib通知链操作链表。
static int __net_init fib_notifier_net_init(struct net *net)
{struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);INIT_LIST_HEAD(&fn_net->fib_notifier_ops);ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain);
对于IPv4,在网络命名空间初始化时,注册fib通知链处理结构fib4_notifier_ops_template。
static const struct fib_notifier_ops fib4_notifier_ops_template = {.family = AF_INET,.fib_seq_read = fib4_seq_read,.fib_dump = fib4_dump,.owner = THIS_MODULE,
};int __net_init fib4_notifier_init(struct net *net)
{struct fib_notifier_ops *ops;net->ipv4.fib_seq = 0;ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);if (IS_ERR(ops))return PTR_ERR(ops);net->ipv4.notifier_ops = ops;
FIB下一跳通知链调用
当出现网络设备down,设备注销,设备物理链路状态改变或者删除设备IP地址等情况时,需要更新所有以此设备为下一跳的表项,并且调用call_fib_nh_notifiers发送下一跳改变的消息到fib通知链,消息类型为FIB_EVENT_NH_DEL。
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{unsigned int hash = fib_devindex_hashfn(dev->ifindex);struct hlist_head *head = &fib_info_devhash[hash];struct fib_nh *nh;hlist_for_each_entry(nh, head, nh_hash) {struct fib_info *fi = nh->nh_parent;...change_nexthops(fi) {if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)dead++;else if (nexthop_nh->fib_nh_dev == dev &&nexthop_nh->fib_nh_scope != scope) {switch (event) {case NETDEV_DOWN:case NETDEV_UNREGISTER:nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;fallthrough;case NETDEV_CHANGE:nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;break;}call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_DEL);
与以上的事件相反,当出现网络设备up,设备物理链路状态改变或者添加设备IP地址等情况时,也需要更新设备相关的下一跳信息。清除下一跳中的RTNH_F_LINKDOWN和/或RTNH_F_DEAD标志,并且发送消息到fib通知链,类型为FIB_EVENT_NH_ADD。
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{struct fib_info *prev_fi;struct hlist_head *head;struct fib_nh *nh;if (!(dev->flags & IFF_UP)) return 0;if (nh_flags & RTNH_F_DEAD) {unsigned int flags = dev_get_flags(dev);if (flags & (IFF_RUNNING | IFF_LOWER_UP))nh_flags |= RTNH_F_LINKDOWN;}hash = fib_devindex_hashfn(dev->ifindex);head = &fib_info_devhash[hash];hlist_for_each_entry(nh, head, nh_hash) {struct fib_info *fi = nh->nh_parent;BUG_ON(!fi->fib_nhs);if (nh->fib_nh_dev != dev || fi == prev_fi)continue;prev_fi = fi;change_nexthops(fi) {if (!(nexthop_nh->fib_nh_flags & nh_flags)) {alive++;continue;}if (!nexthop_nh->fib_nh_dev || !(nexthop_nh->fib_nh_dev->flags & IFF_UP))continue;if (nexthop_nh->fib_nh_dev != dev || !__in_dev_get_rtnl(dev))continue;nexthop_nh->fib_nh_flags &= ~nh_flags;call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
如下call_fib_nh_notifiers函数,调用IPv4对应的通知链函数call_fib4_notifiers。如果接口设置了忽略链路down事件,并且下一跳设置了标志RTNH_F_LINKDOWN,前者可通过PROC文件:
/proc/sys/net/ipv4/conf/ens34/ignore_routes_with_linkdown
进行设置,不调用FIB通知链。另外,如果下一跳FIB以及设置了RTNH_F_DEAD标志,也没有必要调用通知链。
static int call_fib_nh_notifiers(struct fib_nh *nh, enum fib_event_type event_type)
{bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);struct fib_nh_notifier_info info = {.fib_nh = nh,};switch (event_type) {case FIB_EVENT_NH_ADD:if (nh->fib_nh_flags & RTNH_F_DEAD)break;if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)break;return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info);case FIB_EVENT_NH_DEL:if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || (nh->fib_nh_flags & RTNH_F_DEAD))return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info);
FIB表项通知链调用
当用户层通过ip route命令或者route命令,以及内核通过fib_magic自行添加路由表项时,由函数fib_table_insert进行处理,最后由函数call_fib_entry_notifiers发送类型为FIB_EVENT_ENTRY_REPLACE的通知链事件。
int fib_table_insert(struct net *net, struct fib_table *tb,struct fib_config *cfg, struct netlink_ext_ack *extack)
{struct fib_alias *fa, *new_fa;new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);if (!new_fa) goto out;if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) {enum fib_event_type fib_event;fib_event = FIB_EVENT_ENTRY_REPLACE;err = call_fib_entry_notifiers(net, fib_event, key, plen,new_fa, extack);
同上,当要删除一条FIB表项时,也是由函数call_fib_entry_notifiers发送通知链事件。如果FIB表中有路由可替代被删除的路由,发送FIB_EVENT_ENTRY_REPLACE时间,否则发送FIB_EVENT_ENTRY_DEL事件。
int fib_table_delete(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack)
{fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);static void fib_notify_alias_delete(struct net *net, u32 key, struct hlist_head *fah,struct fib_alias *fa_to_delete, struct netlink_ext_ack *extack)
{struct fib_alias *fa_next, *fa_to_notify;u32 tb_id = fa_to_delete->tb_id;u8 slen = fa_to_delete->fa_slen;.../* Determine if the route should be replaced by the next route in the list.*/fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,struct fib_alias, fa_list);if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {fib_event = FIB_EVENT_ENTRY_REPLACE;fa_to_notify = fa_next;} else {fib_event = FIB_EVENT_ENTRY_DEL;fa_to_notify = fa_to_delete;}call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen, fa_to_notify, extack);
下一跳和FIB通知链
当用户层修改下一跳结构时,如通过ip nexthop命令,由函数nexthop_replace_notify发送通知链事件。
static void nexthop_replace_notify(struct net *net, struct nexthop *nh, struct nl_info *info)
{struct nh_grp_entry *nhge;__nexthop_replace_notify(net, nh, info);list_for_each_entry(nhge, &nh->grp_list, nh_list)__nexthop_replace_notify(net, nhge->nh_parent, info);
如果使用此下一跳结构的FIB链表不为空,将此受影响的fib_info结构的成员nh_updated设置为true(后续将据此进行判断),由函数fib_info_notify_update处理更新。
static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, struct nl_info *info)
{if (!list_empty(&nh->fi_list)) {struct fib_info *fi;/* expectation is a few fib_info per nexthop and then* a lot of routes per fib_info. So mark the fib_info* and then walk the fib tables once*/list_for_each_entry(fi, &nh->fi_list, nh_list)fi->nh_updated = true;fib_info_notify_update(net, info);list_for_each_entry(fi, &nh->fi_list, nh_list)fi->nh_updated = false;
遍历命名空间中的路由表哈希数组(目前256个),找到哈希链表头,进一步遍历链表中的每个路由表,针对路由表调用__fib_info_notify_update处理。即此函数将遍历命名空间中的所有路由表。
void fib_info_notify_update(struct net *net, struct nl_info *info)
{unsigned int h;for (h = 0; h < FIB_TABLE_HASHSZ; h++) {struct hlist_head *head = &net->ipv4.fib_table_hash[h];struct fib_table *tb;hlist_for_each_entry_rcu(tb, head, tb_hlist,lockdep_rtnl_is_held())__fib_info_notify_update(net, tb, info);
接下来,遍历每个路由表trie结构的叶子节点,如果其对应的fib_info设置了下一跳更新nh_updated,调用通知链函数call_fib_entry_notifiers。注释中提到,以后此通知链将在nexthop模块中实现。
static void __fib_info_notify_update(struct net *net, struct fib_table *tb, struct nl_info *info)
{struct fib_alias *fa;for (;;) {hlist_for_each_entry(fa, &n->leaf, fa_list) {struct fib_info *fi = fa->fa_info;if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)continue;.../* call_fib_entry_notifiers will be removed when* in-kernel notifier is implemented and supported* for nexthop objects*/call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,n->key, KEYLENGTH - fa->fa_slen, fa, NULL);
FIB表查询
如下函数fib4_dump,其首先dump路由策略,即遍历命名空间中的路由策略,发送fib通知; 其次,由函数fib_notify完成fib表项的dump处理。
static int fib4_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack)
{int err;err = fib4_rules_dump(net, nb, extack);if (err)return err;return fib_notify(net, nb, extack);
函数fib_notify遍历命名空间中所有的路由表,由函数fib_table_notify处理每个路由表的通知(dump处理)。
int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack)
{ unsigned int h;int err;for (h = 0; h < FIB_TABLE_HASHSZ; h++) {struct hlist_head *head = &net->ipv4.fib_table_hash[h];struct fib_table *tb;hlist_for_each_entry_rcu(tb, head, tb_hlist) {err = fib_table_notify(tb, nb, extack);if (err)return err;
遍历路由表trie结构的所有叶子节点,每个节点由fib_leaf_notify处理。
static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, struct netlink_ext_ack *extack)
{ struct trie *t = (struct trie *)tb->tb_data;struct key_vector *l, *tp = t->kv;t_key key = 0;while ((l = leaf_walk_rcu(&tp, key)) != NULL) {err = fib_leaf_notify(l, tb, nb, extack);if (err) return err;key = l->key + 1;/* stop in case of wrap around */if (key < l->key)break;
如下具体的叶子节点处理函数,由于local和main路由表可以共享同一个trie结构,为避免同一个FIB表项进行重复通知,要求fa的路由表ID和当前的ID相同。最后是由函数call_fib_entry_notifier执行通知链。
static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,struct notifier_block *nb, struct netlink_ext_ack *extack)
{struct fib_alias *fa;int last_slen = -1;hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {struct fib_info *fi = fa->fa_info;if (!fi) continue;/* local and main table can share the same trie,* so don't notify twice for the same entry.*/if (tb->tb_id != fa->tb_id) continue;if (fa->fa_slen == last_slen) continue;last_slen = fa->fa_slen;err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,l->key, KEYLENGTH - fa->fa_slen, fa, extack);
FIB通知链注册与处理
函数register_fib_notifier负责FIB通知链的注册,以下可见,在注册新的通知链处理函数时,将执行fib_net_dump,其将调用上一节介绍的fib4_dump函数(对于IPv4协议),将所有的FIB通知发送一遍。
int register_fib_notifier(struct net *net, struct notifier_block *nb,void (*cb)(struct notifier_block *nb), struct netlink_ext_ack *extack)
{int retries = 0;int err;do {unsigned int fib_seq = fib_seq_sum(net);err = fib_net_dump(net, nb, extack);if (err)return err;if (fib_dump_is_consistent(net, nb, cb, fib_seq))return 0;
目前在使用fib通知链的主要是网络驱动程序,如mellanox的网卡驱动,其注册了处理函数mlxsw_sp_router_fib_event。
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,struct netlink_ext_ack *extack)
{...mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),&mlxsw_sp->router->fib_nb,mlxsw_sp_router_fib_dump_flush, extack);
如下函数mlxsw_sp_router_fib_event,处理FIB路由策略和FIB表项的相关事件。
static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
{struct mlxsw_sp_fib_event_work *fib_work;struct fib_notifier_info *info = ptr;struct mlxsw_sp_router *router;if ((info->family != AF_INET && info->family != AF_INET6 &&info->family != RTNL_FAMILY_IPMR &&info->family != RTNL_FAMILY_IP6MR))return NOTIFY_DONE;router = container_of(nb, struct mlxsw_sp_router, fib_nb);switch (event) {case FIB_EVENT_RULE_ADD:case FIB_EVENT_RULE_DEL:err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp);return notifier_from_errno(err);case FIB_EVENT_ENTRY_ADD:case FIB_EVENT_ENTRY_REPLACE:case FIB_EVENT_ENTRY_APPEND:
内核版本 5.10
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
