cregit-Linux how code gets into the kernel

Release 4.11 net/ipv4/ip_sockglue.c

Directory: net/ipv4
/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              The IP to API glue.
 *
 * Authors:     see ip.c
 *
 * Fixes:
 *              Many            :       Split from ip.c , see ip.c for history.
 *              Martin Mares    :       TOS setting fixed.
 *              Alan Cox        :       Fixed a couple of oopses in Martin's
 *                                      TOS tweaks.
 *              Mike McLagan    :       Routing by source
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/tcp_states.h>
#include <linux/udp.h>
#include <linux/igmp.h>
#include <linux/netfilter.h>
#include <linux/route.h>
#include <linux/mroute.h>
#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
#include <net/checksum.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/transp_v6.h>
#endif
#include <net/ip_fib.h>

#include <linux/errqueue.h>
#include <linux/uaccess.h>

/*
 *      SOL_IP control messages.
 */


static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { struct in_pktinfo info = *PKTINFO_SKB_CB(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4987.50%457.14%
Eric Dumazet47.14%228.57%
Arnaldo Carvalho de Melo35.36%114.29%
Total56100.00%7100.00%


static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) { int ttl = ip_hdr(skb)->ttl; put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4093.02%480.00%
Arnaldo Carvalho de Melo36.98%120.00%
Total43100.00%5100.00%


static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) { put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)3291.43%375.00%
Arnaldo Carvalho de Melo38.57%125.00%
Total35100.00%4100.00%


static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) { if (IPCB(skb)->opt.optlen == 0) return; put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, ip_hdr(skb) + 1); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)5294.55%266.67%
Arnaldo Carvalho de Melo35.45%133.33%
Total55100.00%3100.00%


static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; if (IPCB(skb)->opt.optlen == 0) return; if (ip_options_echo(opt, skb)) { msg->msg_flags |= MSG_CTRUNC; return; } ip_options_undo(opt); put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)9498.95%266.67%
Adrian Bunk11.05%133.33%
Total95100.00%3100.00%


static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) { int val; if (IPCB(skb)->frag_max_size == 0) return; val = IPCB(skb)->frag_max_size; put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn57100.00%1100.00%
Total57100.00%1100.00%


static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset) { __wsum csum = skb->csum; if (skb->ip_summed != CHECKSUM_COMPLETE) return; if (offset != 0) { int tend_off = skb_transport_offset(skb) + tlen; csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); } put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); }

Contributors

PersonTokensPropCommitsCommitProp
Tom Herbert7279.12%125.00%
Paolo Abeni1516.48%125.00%
Eric Dumazet33.30%125.00%
Willem de Bruijn11.10%125.00%
Total91100.00%4100.00%


static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; u32 seclen, secid; int err; err = security_socket_getpeersec_dgram(NULL, skb, &secid); if (err) return; err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); security_release_secctx(secdata, seclen); }

Contributors

PersonTokensPropCommitsCommitProp
Catherine Zhang83100.00%2100.00%
Total83100.00%2100.00%


static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); if (skb_transport_offset(skb) + 4 > (int)skb->len) return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ sin.sin_family = AF_INET; sin.sin_addr.s_addr = iph->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); }

Contributors

PersonTokensPropCommitsCommitProp
Balazs Scheidler11394.17%125.00%
Willem de Bruijn32.50%125.00%
Harvey Harrison32.50%125.00%
Eric Dumazet10.83%125.00%
Total120100.00%4100.00%


void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset) { struct inet_sock *inet = inet_sk(sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ if (flags & IP_CMSG_PKTINFO) { ip_cmsg_recv_pktinfo(msg, skb); flags &= ~IP_CMSG_PKTINFO; if (!flags) return; } if (flags & IP_CMSG_TTL) { ip_cmsg_recv_ttl(msg, skb); flags &= ~IP_CMSG_TTL; if (!flags) return; } if (flags & IP_CMSG_TOS) { ip_cmsg_recv_tos(msg, skb); flags &= ~IP_CMSG_TOS; if (!flags) return; } if (flags & IP_CMSG_RECVOPTS) { ip_cmsg_recv_opts(msg, skb); flags &= ~IP_CMSG_RECVOPTS; if (!flags) return; } if (flags & IP_CMSG_RETOPTS) { ip_cmsg_recv_retopts(msg, skb); flags &= ~IP_CMSG_RETOPTS; if (!flags) return; } if (flags & IP_CMSG_PASSSEC) { ip_cmsg_recv_security(msg, skb); flags &= ~IP_CMSG_PASSSEC; if (!flags) return; } if (flags & IP_CMSG_ORIGDSTADDR) { ip_cmsg_recv_dstaddr(msg, skb); flags &= ~IP_CMSG_ORIGDSTADDR; if (!flags) return; } if (flags & IP_CMSG_CHECKSUM) ip_cmsg_recv_checksum(msg, skb, tlen, offset); if (flags & IP_CMSG_RECVFRAGSIZE) ip_cmsg_recv_fragsize(msg, skb); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)10038.91%426.67%
Tom Herbert8733.85%320.00%
Catherine Zhang176.61%16.67%
Balazs Scheidler176.61%16.67%
Willem de Bruijn135.06%16.67%
David S. Miller114.28%16.67%
Eric Dumazet62.33%213.33%
Paolo Abeni51.95%16.67%
Arnaldo Carvalho de Melo10.39%16.67%
Total257100.00%15100.00%

EXPORT_SYMBOL(ip_cmsg_recv_offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) { int err, val; struct cmsghdr *cmsg; struct net *net = sock_net(sk); for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; #if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { struct in6_pktinfo *src_info; if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) return -EINVAL; src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) return -EINVAL; ipc->oif = src_info->ipi6_ifindex; ipc->addr = src_info->ipi6_addr.s6_addr32[3]; continue; } #endif if (cmsg->cmsg_level == SOL_SOCKET) { err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); if (err) return err; continue; } if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - sizeof(struct cmsghdr); /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) return err; break; case IP_PKTINFO: { struct in_pktinfo *info; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; info = (struct in_pktinfo *)CMSG_DATA(cmsg); ipc->oif = info->ipi_ifindex; ipc->addr = info->ipi_spec_dst.s_addr; break; } case IP_TTL: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) return -EINVAL; val = *(int *)CMSG_DATA(cmsg); if (val < 1 || val > 255) return -EINVAL; ipc->ttl = val; break; case IP_TOS: if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) val = *(int *)CMSG_DATA(cmsg); else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) val = *(u8 *)CMSG_DATA(cmsg); else return -EINVAL; if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break; default: return -EINVAL; } } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)16935.28%738.89%
Francesco Fusco11624.22%15.56%
Hannes Frederic Sowa10121.09%15.56%
Eric Dumazet428.77%422.22%
Soheil Hassas Yeganeh398.14%15.56%
Denis V. Lunev61.25%211.11%
Gu Zheng30.63%15.56%
Herbert Xu30.63%15.56%
Total479100.00%18100.00%

/* Special input handler for packets caught by router alert option. They are selected only by protocol field, and then processed likely local ones; but only if someone wants them! Otherwise, router not running rsvpd will kill RSVP. It is user level problem, what it will make with them. I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), but receiver should be enough clever f.e. to forward mtrace requests, sent to multicast group to reach destination designated router. */ struct ip_ra_chain __rcu *ip_ra_chain; static DEFINE_SPINLOCK(ip_ra_lock);
static void ip_ra_destroy_rcu(struct rcu_head *head) { struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); sock_put(ra->saved_sk); kfree(ra); }

Contributors

PersonTokensPropCommitsCommitProp
Eric Dumazet38100.00%2100.00%
Total38100.00%2100.00%


int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) { struct ip_ra_chain *ra, *new_ra; struct ip_ra_chain __rcu **rap; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; (ra = rcu_dereference_protected(*rap, lockdep_is_held(&ip_ra_lock))) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); /* * Delay sock_put(sk) and kfree(ra) after one rcu grace * period. This guarantee ip_call_ra_chain() dont need * to mess with socket refcounts. */ ra->saved_sk = sk; call_rcu(&ra->rcu, ip_ra_destroy_rcu); return 0; } } if (!new_ra) { spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); spin_unlock_bh(&ip_ra_lock); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)21179.62%538.46%
Eric Dumazet4918.49%538.46%
David S. Miller31.13%17.69%
Arnaldo Carvalho de Melo10.38%17.69%
Ian Morris10.38%17.69%
Total265100.00%13100.00%


void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { struct sock_exterr_skb *serr; skb = skb_clone(skb, GFP_ATOMIC); if (!skb) return; serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; serr->ee.ee_type = icmp_hdr(skb)->type; serr->ee.ee_code = icmp_hdr(skb)->code; serr->ee.ee_pad = 0; serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - skb_network_header(skb); serr->port = port; if (skb_pull(skb, payload - skb->data)) { skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) return; } kfree_skb(skb); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)17085.86%116.67%
Arnaldo Carvalho de Melo2512.63%350.00%
Linus Torvalds21.01%116.67%
Al Viro10.51%116.67%
Total198100.00%6100.00%


void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; struct iphdr *iph; struct sk_buff *skb; if (!inet->recverr) return; skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); if (!skb) return; skb_put(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->daddr = daddr; serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_type = 0; serr->ee.ee_code = 0; serr->ee.ee_pad = 0; serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); serr->port = port; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb)) kfree_skb(skb); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)18884.30%216.67%
Arnaldo Carvalho de Melo229.87%650.00%
David S. Miller104.48%18.33%
Al Viro20.90%216.67%
Linus Torvalds10.45%18.33%
Total223100.00%12100.00%

/* For some errors we have valid addr_offset even with zero payload and * zero port. Also, addr_offset should be supported if port is set. */
static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) { return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; }

Contributors

PersonTokensPropCommitsCommitProp
Julian Anastasov33100.00%1100.00%
Total33100.00%1100.00%

/* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: * the PKTINFO fields in skb->cb[]. Fill those in here. */
static bool ipv4_datagram_support_cmsg(const struct sock *sk, struct sk_buff *skb, int ee_origin) { struct in_pktinfo *info; if (ee_origin == SO_EE_ORIGIN_ICMP) return true; if (ee_origin == SO_EE_ORIGIN_LOCAL) return false; /* Support IP_PKTINFO on tstamp packets if requested, to correlate * timestamp with egress dev. Not possible for packets without iif * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). */ info = PKTINFO_SKB_CB(skb); if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || !info->ipi_ifindex) return false; info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn86100.00%3100.00%
Total86100.00%3100.00%

/* * Handle MSG_ERRQUEUE */
int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb; DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct { struct sock_extended_err ee; struct sockaddr_in offender; } errhdr; int err; int copied; WARN_ON_ONCE(sk->sk_family == AF_INET6); err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (!skb) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (unlikely(err)) { kfree_skb(skb); return err; } sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); if (sin && ipv4_datagram_support_addr(serr)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); /* Now we could try to dump offended packet options */ msg->msg_flags |= MSG_ERRQUEUE; err = copied; consume_skb(skb); out: return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)25569.86%420.00%
Willem de Bruijn3910.68%630.00%
Linus Torvalds215.75%15.00%
Eric Dumazet143.84%15.00%
Hannes Frederic Sowa133.56%15.00%
Steffen Hurrle113.01%15.00%
Arnaldo Carvalho de Melo61.64%210.00%
Julian Anastasov30.82%15.00%
Ian Morris10.27%15.00%
David S. Miller10.27%15.00%
Al Viro10.27%15.00%
Total365100.00%20100.00%

/* * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket. */
static bool setsockopt_needs_rtnl(int optname) { switch (optname) { case IP_ADD_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: case IP_BLOCK_SOURCE: case IP_DROP_MEMBERSHIP: case IP_DROP_SOURCE_MEMBERSHIP: case IP_MSFILTER: case IP_UNBLOCK_SOURCE: case MCAST_BLOCK_SOURCE: case MCAST_MSFILTER: case MCAST_JOIN_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: case IP_ROUTER_ALERT: return true; } return false; }

Contributors

PersonTokensPropCommitsCommitProp
Marcelo Ricardo Leitner6395.45%266.67%
Américo Wang34.55%133.33%
Total66100.00%3100.00%


static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); int val = 0, err; bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { case IP_PKTINFO: case IP_RECVTTL: case IP_RECVOPTS: case IP_RECVTOS: case IP_RETOPTS: case IP_TOS: case IP_TTL: case IP_HDRINCL: case IP_MTU_DISCOVER: case IP_RECVERR: case IP_ROUTER_ALERT: case IP_FREEBIND: case IP_PASSSEC: case IP_TRANSPARENT: case IP_MINTTL: case IP_NODEFRAG: case IP_BIND_ADDRESS_NO_PORT: case IP_UNICAST_IF: case IP_MULTICAST_TTL: case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: case IP_RECVFRAGSIZE: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; } else if (optlen >= sizeof(char)) { unsigned char ucval; if (get_user(ucval, (unsigned char __user *) optval)) return -EFAULT; val = (int) ucval; } } /* If optlen==0, it is equivalent to val == 0 */ if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; if (needs_rtnl) rtnl_lock(); lock_sock(sk); switch (optname) { case IP_OPTIONS: { struct ip_options_rcu *old, *opt = NULL; if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; old = rcu_dereference_protected(inet->inet_opt, lockdep_sock_is_held(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk