cregit-Linux how code gets into the kernel

Release 4.8 net/ipv4/raw.c

Directory: net/ipv4
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *              RAW - implementation of IP "raw" sockets.
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 * Fixes:
 *              Alan Cox        :       verify_area() fixed up
 *              Alan Cox        :       ICMP error handling
 *              Alan Cox        :       EMSGSIZE if you send too big a packet
 *              Alan Cox        :       Now uses generic datagrams and shared
 *                                      skbuff library. No more peek crashes,
 *                                      no more backlogs
 *              Alan Cox        :       Checks sk->broadcast.
 *              Alan Cox        :       Uses skb_free_datagram/skb_copy_datagram
 *              Alan Cox        :       Raw passes ip options too
 *              Alan Cox        :       Setsocketopt added
 *              Alan Cox        :       Fixed error return for broadcasts
 *              Alan Cox        :       Removed wake_up calls
 *              Alan Cox        :       Use ttl/tos
 *              Alan Cox        :       Cleaned up old debugging
 *              Alan Cox        :       Use new kernel side addresses
 *      Arnt Gulbrandsen        :       Fixed MSG_DONTROUTE in raw sockets.
 *              Alan Cox        :       BSD style RAW socket demultiplexing.
 *              Alan Cox        :       Beginnings of mrouted support.
 *              Alan Cox        :       Added IP_HDRINCL option.
 *              Alan Cox        :       Skip broadcast check if BSDism set.
 *              David S. Miller :       New socket lookup architecture.
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.

#include <linux/types.h>
#include <linux/atomic.h>
#include <asm/byteorder.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/sockios.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/mroute.h>
#include <linux/netdevice.h>
#include <linux/in_route.h>
#include <linux/route.h>
#include <linux/skbuff.h>
#include <linux/igmp.h>
#include <net/net_namespace.h>
#include <net/dst.h>
#include <net/sock.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <net/snmp.h>
#include <net/tcp_states.h>
#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/compat.h>
#include <linux/uio.h>

struct raw_frag_vec {
struct msghdr *msg;
	union {
struct icmphdr icmph;
char c[1];
int hlen;

static struct raw_hashinfo raw_v4_hashinfo = {
	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),

int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *head; head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; write_lock_bh(&h->lock); sk_add_node(sk, head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&h->lock); return 0; }


pavel emelianovpavel emelianov3539.33%428.57%
arnaldo carvalho de meloarnaldo carvalho de melo88.99%214.29%
eric dumazeteric dumazet44.49%214.29%
craig gallekcraig gallek44.49%17.14%
david s. millerdavid s. miller33.37%17.14%

void raw_unhash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; write_lock_bh(&h->lock); if (sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&h->lock); }


pavel emelianovpavel emelianov5793.44%375.00%
eric dumazeteric dumazet46.56%125.00%

static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } sk = NULL; found: return sk; }


arnaldo carvalho de meloarnaldo carvalho de melo2017.09%321.43%
david s. millerdavid s. miller1311.11%17.14%
pavel emelianovpavel emelianov97.69%214.29%
hideaki yoshifujihideaki yoshifuji75.98%214.29%
eric dumazeteric dumazet54.27%17.14%
alexey dobriyanalexey dobriyan21.71%17.14%

/* * 0 - deliver * 1 - block */
static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { struct icmphdr _hdr; const struct icmphdr *hdr; hdr = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_hdr), &_hdr); if (!hdr) return 1; if (hdr->type < 32) { __u32 data = raw_sk(sk)->; return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ return 0; }


eric dumazeteric dumazet3334.02%116.67%
linus torvaldslinus torvalds1919.59%116.67%
ian prattian pratt1111.34%116.67%
david s. millerdavid s. miller22.06%116.67%
arnaldo carvalho de meloarnaldo carvalho de melo11.03%116.67%

/* IP input processing comes here for RAW socket delivery. * Caller owns SKB, so we must make clones. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; int delivered = 0; struct net *net; read_lock(&raw_v4_hashinfo.lock); head = &[hash]; if (hlist_empty(head)) goto out; net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, skb->dev->ifindex)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); } out: read_unlock(&raw_v4_hashinfo.lock); return delivered; }


pavel emelianovpavel emelianov2511.21%323.08%
david s. millerdavid s. miller2410.76%17.69%
arnaldo carvalho de meloarnaldo carvalho de melo229.87%215.38%
quentin armitagequentin armitage219.42%17.69%
patrick mchardypatrick mchardy135.83%17.69%
hideaki yoshifujihideaki yoshifuji31.35%17.69%
eric dumazeteric dumazet10.45%17.69%

int raw_local_deliver(struct sk_buff *skb, int protocol) { int hash; struct sock *raw_sk; hash = protocol & (RAW_HTABLE_SIZE - 1); raw_sk = sk_head(&[hash]); /* If there maybe a raw socket we must check - if not we * don't care less */ if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) raw_sk = NULL; return raw_sk != NULL; }


pavel emelianovpavel emelianov71100.00%2100.00%

static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) { struct inet_sock *inet = inet_sk(sk); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; int err = 0; int harderr = 0; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); else if (type == ICMP_REDIRECT) { ipv4_sk_redirect(skb, sk); return; } /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication is useless without ip_recverr and error is hard. */ if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED) return; switch (type) { default: case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; case ICMP_SOURCE_QUENCH: return; case ICMP_PARAMETERPROB: err = EPROTO; harderr = 1; break; case ICMP_DEST_UNREACH: err = EHOSTUNREACH; if (code > NR_ICMP_UNREACH) break; err = icmp_err_convert[code].errno; harderr = icmp_err_convert[code].fatal; if (code == ICMP_FRAG_NEEDED) { harderr = inet->pmtudisc != IP_PMTUDISC_DONT; err = EMSGSIZE; } } if (inet->recverr) { const struct iphdr *iph = (const struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); if (inet->hdrincl) payload = skb->data; ip_icmp_error(sk, skb, err, 0, info, payload); } if (inet->recverr || harderr) { sk->sk_err = err; sk->sk_error_report(sk); } }


linus torvaldslinus torvalds4716.49%15.00%
david s. millerdavid s. miller4716.49%315.00%
arnaldo carvalho de meloarnaldo carvalho de melo124.21%315.00%
duan jiongduan jiong31.05%15.00%
eric dumazeteric dumazet20.70%15.00%
pavel emelianovpavel emelianov10.35%15.00%

void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { int hash; struct sock *raw_sk; const struct iphdr *iph; struct net *net; hash = protocol & (RAW_HTABLE_SIZE - 1); read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&[hash]); if (raw_sk) { iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); iph = (const struct iphdr *)skb->data; } } read_unlock(&raw_v4_hashinfo.lock); }


pavel emelianovpavel emelianov15696.30%360.00%
hideaki yoshifujihideaki yoshifuji31.85%120.00%
eric dumazeteric dumazet31.85%120.00%

static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { /* Charge it to the socket. */ ipv4_pktinfo_prepare(sk, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } return NET_RX_SUCCESS; }


eric dumazeteric dumazet612.50%120.00%
shawn bohrershawn bohrer24.17%120.00%

int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } nf_reset(skb); skb_push(skb, skb->data - skb_network_header(skb)); raw_rcv_skb(sk, skb); return 0; }


alexey kuznetsovalexey kuznetsov2027.03%16.67%
wang chenwang chen810.81%16.67%
patrick mchardypatrick mchardy56.76%16.67%
linus torvaldslinus torvalds45.41%16.67%
arnaldo carvalho de meloarnaldo carvalho de melo34.05%16.67%
hideaki yoshifujihideaki yoshifuji11.35%16.67%
david s. millerdavid s. miller11.35%16.67%

static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, struct rtable **rtp, unsigned int flags, const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; int err; struct rtable *rt = *rtp; int hlen, tlen; if (length > rt->>mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, rt->>mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; hlen = LL_RESERVED_SPACE(rt->; tlen = rt->>needed_tailroom; skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (!skb) goto error; skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb_dst_set(skb, &rt->dst); *rtp = NULL; skb_reset_network_header(skb); iph = ip_hdr(skb); skb_put(skb, length); skb->ip_summed = CHECKSUM_NONE; sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_from_msg(iph, msg, length)) goto error_free; iphlen = iph->ihl * 4; /* * We don't want to modify the ip header, but we do need to * be sure that it won't cause problems later along the network * stack. Specifically we want to make sure that iph->ihl is a * sane value. If ihl points beyond the length of the buffer passed * in, reject the frame as invalid */ err = -EINVAL; if (iphlen > length) goto error_free; if (iphlen >= sizeof(*iph)) { if (!iph->saddr) iph->saddr = fl4->saddr; iph->check = 0; iph->tot_len = htons(length); if (!iph->id) ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); skb->transport_header += iphlen; if (iph->protocol == IPPROTO_ICMP && length >= iphlen + sizeof(struct icmphdr)) icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); } err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) goto error; out: return 0; error_free: kfree_skb(skb); error: IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); if (err == -ENOBUFS && !inet->recverr) err = 0; return err; }


alexey kuznetsovalexey kuznetsov20839.62%36.82%
herbert xuherbert xu438.19%24.55%
eric dumazeteric dumazet356.67%49.09%
david l stevensdavid l stevens254.76%12.27%
neil hormanneil horman224.19%12.27%
ben cartwright-coxben cartwright-cox173.24%12.27%
arnaldo carvalho de meloarnaldo carvalho de melo152.86%511.36%
pavel emelianovpavel emelianov142.67%24.55%
willem de bruijnwillem de bruijn132.48%12.27%
david s. millerdavid s. miller112.10%24.55%
soheil hassas yeganehsoheil hassas yeganeh101.90%12.27%
laszlo attila tothlaszlo attila toth71.33%12.27%
al viroal viro50.95%24.55%
eric w. biedermaneric w. biederman30.57%24.55%
hannes frederic sowahannes frederic sowa20.38%12.27%
ian morrisian morris10.19%12.27%
jan engelhardtjan engelhardt10.19%12.27%
ansis attekaansis atteka10.19%12.27%
jesper juhljesper juhl10.19%12.27%
patrick mchardypatrick mchardy10.19%12.27%
linus torvaldslinus torvalds10.19%12.27%
hideaki yoshifujihideaki yoshifuji10.19%12.27%

static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4) { int err; if (fl4->flowi4_proto != IPPROTO_ICMP) return 0; /* We only need the first two bytes. */ rfv->hlen = 2; err = memcpy_from_msg(rfv->hdr.c, rfv->msg, rfv->hlen); if (err) return err; fl4->fl4_icmp_type = rfv->hdr.icmph.type; fl4->fl4_icmp_code = rfv->hdr.icmph.code; return 0; }


herbert xuherbert xu4853.33%228.57%
masahide nakamuramasahide nakamura3033.33%114.29%
heiko carstensheiko carstens77.78%114.29%
david s. millerdavid s. miller33.33%228.57%
al viroal viro22.22%114.29%

static int raw_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct raw_frag_vec *rfv = from; if (offset < rfv->hlen) { int copy = min(rfv->hlen - offset, len); if (skb->ip_summed == CHECKSUM_PARTIAL) memcpy(to, rfv->hdr.c + offset, copy); else skb->csum = csum_block_add( skb->csum, csum_partial_copy_nocheck(rfv->hdr.c + offset, to, copy, 0), odd); odd = 0; offset += copy; to += copy; len -= copy; if (!len) return 0; } offset -= rfv->hlen; return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); }


herbert xuherbert xu16099.38%150.00%
al viroal viro10.62%150.00%

static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; int free = 0; __be32 daddr; __be32 saddr; u8 tos; int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; err = -EMSGSIZE; if (len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", __func__, current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; ipc.ttl = 0; ipc.tos = -1; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); goto out; } if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) goto done; daddr = ipc.opt->opt.faddr; } } tos = get_rtconn_flags(&ipc, sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } else if (!ipc.oif) ipc.oif = inet->uc_index; flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, &fl4); if (err < 0) goto done; } if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; err = raw_probe_proto_opt(&rfv, &fl4); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); else { sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); err = ip_append_data(