网络篇之socket

应用层的方法socket用于创建套接字,tcp,udp都是通过此方法创建的。此方法对应的系统调用是socket,下面是系统调用的代码。

SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{int retval;struct socket *sock;int flags;/* Check the SOCK_* constants for consistency.  */BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);flags = type & ~SOCK_TYPE_MASK;if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))return -EINVAL;type &= SOCK_TYPE_MASK;if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;retval = sock_create(family, type, protocol, &sock);if (retval < 0)return retval;return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}

1.sock_create的处理逻辑

sock_create函数内部会创建struct socket结构体,并通过sock_map_fd保存到进程对象task_struct的成员files上。应用层socket,返回的是内核创建的struct socket在files中的索引。

sock_create实际调用的是__sock_create,下面来看下__sock_create

int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern/*1*/)
{int err;struct socket *sock;const struct net_proto_family *pf;/**      Check protocol is in range*/if (family < 0 || family >= NPROTO)return -EAFNOSUPPORT;if (type < 0 || type >= SOCK_MAX)return -EINVAL;/* Compatibility.This uglymoron is moved from INET layer to here to avoiddeadlock in module load.*/if (family == PF_INET && type == SOCK_PACKET) {pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",current->comm);family = PF_PACKET;}err = security_socket_create(family, type, protocol, kern);if (err)return err;/**	Allocate the socket and allow the family to set things up. if*	the protocol is 0, the family is instructed to select an appropriate*	default.*/sock = sock_alloc();if (!sock) {net_warn_ratelimited("socket: no more sockets\n");return -ENFILE;	/* Not exactly a match, but its theclosest posix thing */}sock->type = type;#ifdef CONFIG_MODULES/* Attempt to load a protocol module if the find failed.** 12/09/1996 Marcin: But! this makes REALLY only sense, if the user* requested real, full-featured networking support upon configuration.* Otherwise module support will break!*/if (rcu_access_pointer(net_families[family]) == NULL)request_module("net-pf-%d", family);
#endifrcu_read_lock();pf = rcu_dereference(net_families[family]);err = -EAFNOSUPPORT;if (!pf)goto out_release;/** We will call the ->create function, that possibly is in a loadable* module, so we have to bump that loadable module refcnt first.*/if (!try_module_get(pf->owner))goto out_release;/* Now protected by module ref count */rcu_read_unlock();// inet_create中会创建struct sock,并赋给struct socket.skerr = pf->create(net, sock, protocol, kern); // => inet_createif (err < 0)goto out_module_put;/** Now to bump the refcnt of the [loadable] module that owns this* socket at sock_release time we decrement its refcnt.*/if (!try_module_get(sock->ops->owner))goto out_module_busy;/** Now that we're done with the ->create function, the [loadable]* module can have its refcnt decremented*/module_put(pf->owner);err = security_socket_post_create(sock, family, type, protocol, kern);if (err)goto out_sock_release;*res = sock;return 0;out_module_busy:err = -EAFNOSUPPORT;
out_module_put:sock->ops = NULL;module_put(pf->owner);
out_sock_release:sock_release(sock);return err;out_release:rcu_read_unlock();goto out_sock_release;
}

各数据结构之间的关系

各数据结构之间的关系,可参考上图。

sock_alloc内依次调用new_inode_pseudo -> alloc_inode。
在alloc_inode中调用 sb->s_op->alloc_inode(实际为sock_alloc_inode)创建socket_alloc对象。
sock_alloc返回的struct socket即为struct socket_alloc中的socket。

__sock_create中的pf->create实际为inet_create。

static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
{struct sock *sk;struct inet_protosw *answer;struct inet_sock *inet;struct proto *answer_prot;unsigned char answer_flags;int try_loading_module = 0;int err;if (protocol < 0 || protocol >= IPPROTO_MAX)return -EINVAL;sock->state = SS_UNCONNECTED;/* Look for the requested type/protocol pair. */
lookup_protocol:err = -ESOCKTNOSUPPORT;rcu_read_lock();list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {err = 0;/* Check the non-wild match. */if (protocol == answer->protocol) {if (protocol != IPPROTO_IP)break;} else {/* Check for the two wild cases. */if (IPPROTO_IP == protocol) {protocol = answer->protocol;break;}if (IPPROTO_IP == answer->protocol)break;}err = -EPROTONOSUPPORT;}... ...sock->ops = answer->ops;answer_prot = answer->prot;answer_flags = answer->flags;rcu_read_unlock();WARN_ON(!answer_prot->slab);err = -ENOBUFS;sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);if (!sk)goto out;err = 0;if (INET_PROTOSW_REUSE & answer_flags)sk->sk_reuse = SK_CAN_REUSE;inet = inet_sk(sk);inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;inet->nodefrag = 0;if (SOCK_RAW == sock->type) {inet->inet_num = protocol;if (IPPROTO_RAW == protocol)inet->hdrincl = 1;}if (net->ipv4.sysctl_ip_no_pmtu_disc)inet->pmtudisc = IP_PMTUDISC_DONT;elseinet->pmtudisc = IP_PMTUDISC_WANT;inet->inet_id = 0;sock_init_data(sock, sk);sk->sk_destruct	   = inet_sock_destruct;sk->sk_protocol	   = protocol;sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;inet->uc_ttl	= -1;inet->mc_loop	= 1;inet->mc_ttl	= 1;inet->mc_all	= 1;inet->mc_index	= 0;inet->mc_list	= NULL;inet->rcv_tos	= 0;sk_refcnt_debug_inc(sk);if (inet->inet_num) {/* It assumes that any protocol which allows* the user to assign a number at socket* creation time automatically* shares.*/inet->inet_sport = htons(inet->inet_num);/* Add to protocol hash chains. */err = sk->sk_prot->hash(sk);if (err) {sk_common_release(sk);goto out;}}if (sk->sk_prot->init) {err = sk->sk_prot->init(sk);if (err) {sk_common_release(sk);goto out;}}if (!kern) {err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);if (err) {sk_common_release(sk);goto out;}}
out:return err;
out_rcu_unlock:rcu_read_unlock();goto out;
}

根据协议类型,在inetsw数组中查找对应的协议处理方法,实际相当于遍历下面的数组:

static struct inet_protosw inetsw_array[] =
{{.type =       SOCK_STREAM,.protocol =   IPPROTO_TCP,.prot =       &tcp_prot,.ops =        &inet_stream_ops,.flags =      INET_PROTOSW_PERMANENT |INET_PROTOSW_ICSK,},{.type =       SOCK_DGRAM,.protocol =   IPPROTO_UDP,.prot =       &udp_prot,.ops =        &inet_dgram_ops,.flags =      INET_PROTOSW_PERMANENT,},{.type =       SOCK_DGRAM,.protocol =   IPPROTO_ICMP,.prot =       &ping_prot,.ops =        &inet_sockraw_ops,.flags =      INET_PROTOSW_REUSE,},{.type =       SOCK_RAW,.protocol =   IPPROTO_IP,	/* wild card */.prot =       &raw_prot,.ops =        &inet_sockraw_ops,.flags =      INET_PROTOSW_REUSE,}
};

我们这里主要介绍tcp的处理逻辑,下面的ops最终指向inet_stream_ops。

sock->ops = answer->ops;

sk_alloc中调用sk_prot_alloc来创建struct sock对象,下面是sk_prot_alloc的代码:

static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,int family)
{struct sock *sk;struct kmem_cache *slab;slab = prot->slab;if (slab != NULL) {sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);if (!sk)return sk;if (priority & __GFP_ZERO)sk_prot_clear_nulls(sk, prot->obj_size);} elsesk = kmalloc(prot->obj_size, priority);if (sk != NULL) {if (security_sk_alloc(sk, family, priority))goto out_free;if (!try_module_get(prot->owner))goto out_free_sec;sk_tx_queue_clear(sk);}return sk;out_free_sec:security_sk_free(sk);
out_free:if (slab != NULL)kmem_cache_free(slab, sk);elsekfree(sk);return NULL;
}

sk_prot_alloc的参数prot,最终追溯到struct proto tcp_prot。

struct proto tcp_prot = {
.name = “TCP”,
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
… …
.obj_size = sizeof(struct tcp_sock),
… …
};

prot->slab为空,申请的sk大小为prot->obj_size。由上面的定义可以知道,obj_size为struct tcp_sock的大小。
内核代码中经常在struct sock,struct inet_sock之间转化。struct sock为struct inet_sock的第一个成员,而struct inet_sock又为struct tcp_sock的第一个成员(参考上面的图),这是两种类型可以相互转化的原因所在。

inet_create中通过sock_init_data方法,将struct socket与struct sock关联了起来。

2.sock_map_fd的处理逻辑

static int sock_map_fd(struct socket *sock, int flags)
{struct file *newfile;int fd = get_unused_fd_flags(flags);if (unlikely(fd < 0)) {sock_release(sock);return fd;}newfile = sock_alloc_file(sock, flags, NULL);if (likely(!IS_ERR(newfile))) {fd_install(fd, newfile);return fd;}put_unused_fd(fd);return PTR_ERR(newfile);
}

get_unused_fd_flags 查找current->files中第一个未使用的句柄。前三个(即0,1,2)被控制台的读,写,错误句柄占用,其他的句柄,从3开始分配。

sock_alloc_file 申请一个struct file对象,并与sock_map_fd的第一个参数进行关联。

sock->file = file;
file->private_data = sock;

上述struct file对象申请成功,调用fd_install将struct file对象保存到current->files的fd位置上,返回fd。
此fd即为socket系统调用的返回结果。


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部