cregit-Linux how code gets into the kernel

Release 4.15 net/packet/af_packet.c

Directory: net/packet
/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              PACKET - implements raw packet sockets.
 *
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 *
 * Fixes:
 *              Alan Cox        :       verify_area() now used correctly
 *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 *              Alan Cox        :       tidied skbuff lists.
 *              Alan Cox        :       Now uses generic datagram routines I
 *                                      added. Also fixed the peek/read crash
 *                                      from all old Linux datagram code.
 *              Alan Cox        :       Uses the improved datagram code.
 *              Alan Cox        :       Added NULL's for socket options.
 *              Alan Cox        :       Re-commented the code.
 *              Alan Cox        :       Use new kernel side addressing
 *              Rob Janssen     :       Correct MTU usage.
 *              Dave Platt      :       Counter leaks caused by incorrect
 *                                      interrupt locking and some slightly
 *                                      dubious gcc output. Can you read
 *                                      compiler: it said _VOLATILE_
 *      Richard Kooijman        :       Timestamp fixes.
 *              Alan Cox        :       New buffers. Use sk->mac.raw.
 *              Alan Cox        :       sendmsg/recvmsg support.
 *              Alan Cox        :       Protocol setting support
 *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 *      Cyrus Durgin            :       Fixed kerneld for kmod.
 *      Michal Ostrowski        :       Module initialization cleanup.
 *         Ulises Alonso        :       Frame number limit removal and
 *                                      packet_set_ring memory leak.
 *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 *                                      The convention is that longer addresses
 *                                      will simply extend the hardware address
 *                                      byte arrays at the end of sockaddr_ll
 *                                      and packet_mreq.
 *              Johann Baudy    :       Added TX RING.
 *              Chetan Loke     :       Implemented TPACKET_V3 block abstraction
 *                                      layer.
 *                                      Copyright (C) 2011, <lokec@ccs.neu.edu>
 *
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 */

#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/if_vlan.h>
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/percpu.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <linux/bpf.h>
#include <net/compat.h>

#include "internal.h"

/*
   Assumptions:
   - if device has no dev->hard_header routine, it adds and removes ll header
     inside itself. In this case ll header is invisible outside of device,
     but higher levels still should reserve dev->hard_header_len.
     Some devices are enough clever to reallocate skb, when header
     will not fit to reserved space (tunnel), another ones are silly
     (PPP).
   - packet socket receives packets with pulled ll header,
     so that SOCK_RAW should push it back.

On receive:
-----------

Incoming, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> data

Outgoing, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> ll header

Incoming, dev->hard_header==NULL
   mac_header -> UNKNOWN position. It is very likely, that it points to ll
                 header.  PPP makes it, that is wrong, because introduce
                 assymetry between rx and tx paths.
   data       -> data

Outgoing, dev->hard_header==NULL
   mac_header -> data. ll header is still not built!
   data       -> data

Resume
  If dev->hard_header==NULL we are unlikely to restore sensible ll header.


On transmit:
------------

dev->hard_header != NULL
   mac_header -> ll header
   data       -> ll header

dev->hard_header == NULL (ll header is added by device, we cannot control it)
   mac_header -> data
   data       -> data

   We should set nh.raw on output to correct posistion,
   packet classifier depends on it.
 */

/* Private packet socket structures. */

/* identical to struct packet_mreq except it has
 * a longer address field.
 */

struct packet_mreq_max {
	
int		mr_ifindex;
	
unsigned short	mr_type;
	
unsigned short	mr_alen;
	
unsigned char	mr_address[MAX_ADDR_LEN];
};


union tpacket_uhdr {
	
struct tpacket_hdr  *h1;
	
struct tpacket2_hdr *h2;
	
struct tpacket3_hdr *h3;
	
void *raw;
};

static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
		int closing, int tx_ring);


#define V3_ALIGNMENT	(8)


#define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))


#define BLK_PLUS_PRIV(sz_of_priv) \
	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))


#define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)

#define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)

#define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)

#define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)

#define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)

#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)

#define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))

struct packet_sock;
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
		       struct packet_type *pt, struct net_device *orig_dev);

static void *packet_previous_frame(struct packet_sock *po,
		struct packet_ring_buffer *rb,
		int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
static int prb_curr_blk_in_use(struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
			struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
		struct packet_sock *, unsigned int status);
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
		struct tpacket_block_desc *);
static void prb_retire_rx_blk_timer_expired(struct timer_list *);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb);


struct packet_skb_cb {
	union {
		
struct sockaddr_pkt pkt;
		union {
			/* Trick: alias skb original length with
                         * ll.sll_family and ll.protocol in order
                         * to save room.
                         */
			
unsigned int origlen;
			
struct sockaddr_ll ll;
		};
	
} sa;
};


#define vio_le() virtio_legacy_is_little_endian()


#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))


#define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))

#define GET_PBLOCK_DESC(x, bid)	\
	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))

#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))

#define GET_NEXT_PRB_BLK_NUM(x) \
	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
        ((x)->kactive_blk_num+1) : 0)

static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);


static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; skb = validate_xmit_skb_list(skb, dev); if (skb != orig_skb) goto drop; packet_pick_tx_queue(dev, skb); txq = skb_get_tx_queue(dev, skb); local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); if (!dev_xmit_complete(ret)) kfree_skb(skb); return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann14585.80%550.00%
Willem de Bruijn137.69%110.00%
Iván Briano74.14%110.00%
David S. Miller42.37%330.00%
Total169100.00%10100.00%


static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(po->cached_dev); if (likely(dev)) dev_hold(dev); rcu_read_unlock(); return dev; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann48100.00%1100.00%
Total48100.00%1100.00%


static void packet_cached_dev_assign(struct packet_sock *po, struct net_device *dev) { rcu_assign_pointer(po->cached_dev, dev); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann25100.00%1100.00%
Total25100.00%1100.00%


static void packet_cached_dev_reset(struct packet_sock *po) { RCU_INIT_POINTER(po->cached_dev, NULL); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann20100.00%1100.00%
Total20100.00%1100.00%


static bool packet_use_direct_xmit(const struct packet_sock *po) { return po->xmit == packet_direct_xmit; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann19100.00%1100.00%
Total19100.00%1100.00%


static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann2696.30%266.67%
Li Zhong13.70%133.33%
Total27100.00%3100.00%


static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index; if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb, NULL, __packet_pick_tx_queue); queue_index = netdev_cap_txqueue(dev, queue_index); } else { queue_index = __packet_pick_tx_queue(dev, skb); } skb_set_queue_mapping(skb, queue_index); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann80100.00%1100.00%
Total80100.00%1100.00%

/* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */
static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); if (!po->running) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; } }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller63100.00%2100.00%
Total63100.00%2100.00%

/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock * held. If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the * callers responsibility to take care of this. */
static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); po->running = 0; if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); __sock_put(sk); if (sync) { spin_unlock(&po->bind_lock); synchronize_net(); spin_lock(&po->bind_lock); } }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller82100.00%2100.00%
Total82100.00%2100.00%


static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); if (po->running) __unregister_prot_hook(sk, sync); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller37100.00%1100.00%
Total37100.00%1100.00%


static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); }

Contributors

PersonTokensPropCommitsCommitProp
Changli Gao2472.73%150.00%
Michael S. Tsirkin927.27%150.00%
Total33100.00%2100.00%


static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: h.h3->tp_status = status; flush_dcache_page(pgv_to_page(&h.h3->tp_status)); break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } smp_wmb(); }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy4334.40%110.00%
Patrick McHardy3931.20%110.00%
Sowmini Varadhan2217.60%110.00%
Chetan Loke75.60%110.00%
Ulisses Alonso Camaró54.00%110.00%
Daniel Borkmann32.40%110.00%
Changli Gao21.60%110.00%
Linus Torvalds (pre-git)21.60%110.00%
Michael S. Tsirkin10.80%110.00%
Arnaldo Carvalho de Melo10.80%110.00%
Total125100.00%10100.00%


static int __packet_get_status(struct packet_sock *po, void *frame) { union tpacket_uhdr h; smp_rmb(); h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; case TPACKET_V3: flush_dcache_page(pgv_to_page(&h.h3->tp_status)); return h.h3->tp_status; default: WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } }

Contributors

PersonTokensPropCommitsCommitProp
Patrick McHardy4235.29%116.67%
Johann Baudy3529.41%116.67%
Sowmini Varadhan2016.81%116.67%
Chetan Loke1714.29%116.67%
Daniel Borkmann32.52%116.67%
Changli Gao21.68%116.67%
Total119100.00%6100.00%


static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps && (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; if (ktime_to_timespec_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann69100.00%2100.00%
Total69100.00%2100.00%


static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct sk_buff *skb) { union tpacket_uhdr h; struct timespec ts; __u32 ts_status; if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) return 0; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; break; case TPACKET_V2: h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); return ts_status; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn12975.00%125.00%
Daniel Borkmann4325.00%375.00%
Total172100.00%4100.00%


static void *packet_lookup_frame(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; h.raw = rb->pg_vec[pg_vec_pos].buffer + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; return h.raw; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke8996.74%150.00%
Daniel Borkmann33.26%150.00%
Total92100.00%2100.00%


static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke34100.00%1100.00%
Total34100.00%1100.00%


static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke19100.00%2100.00%
Total19100.00%2100.00%


static void prb_shutdown_retire_blk_timer(struct packet_sock *po, struct sk_buff_head *rb_queue) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke5391.38%250.00%
Duan Jiong35.17%125.00%
Veaceslav Falico23.45%125.00%
Total58100.00%4100.00%


static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, 0); pkc->retire_blk_timer.expires = jiffies; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke2963.04%250.00%
Kees Cook1430.43%125.00%
Duan Jiong36.52%125.00%
Total46100.00%4100.00%


static int prb_calc_retire_blk_tmo(struct packet_sock *po, int blk_size_in_bytes) { struct net_device *dev; unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; struct ethtool_link_ksettings ecmd; int err; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); if (unlikely(!dev)) { rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_link_ksettings(dev, &ecmd); rtnl_unlock(); if (!err) { /* * If the link speed is so slow you don't really * need to worry about perf anyways */ if (ecmd.base.speed < SPEED_1000 || ecmd.base.speed == SPEED_UNKNOWN) { return DEFAULT_PRB_RETIRE_TOV; } else { msec = 1; div = ecmd.base.speed / 1000; } } mbits = (blk_size_in_bytes * 8) / (1024 * 1024); if (div) mbits /= div; tmo = mbits * msec; if (div) return tmo+1; return tmo; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke12770.17%125.00%
Jiri Pirko2513.81%125.00%
Parav Pandit158.29%125.00%
David Decotigny147.73%125.00%
Total181100.00%4100.00%


static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, union tpacket_req_u *req_u) { p1->feature_req_word = req_u->req3.tp_feature_req_word; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke26100.00%2100.00%
Total26100.00%2100.00%


static void init_prb_bdqc(struct packet_sock *po, struct packet_ring_buffer *rb, struct pgv *pg_vec, union tpacket_req_u *req_u) { struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po); prb_open_block(p1, pbd); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke21391.81%240.00%
Eric Dumazet156.47%120.00%
Duan Jiong31.29%120.00%
Daniel Borkmann10.43%120.00%
Total232100.00%5100.00%

/* Do NOT update the last_blk_num first. * Assumes sk_buff_head lock is held. */
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) { mod_timer(&pkc->retire_blk_timer, jiffies + pkc->tov_in_jiffies); pkc->last_kactive_blk_num = pkc->kactive_blk_num; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke33100.00%2100.00%
Total33100.00%2100.00%

/* * Timer logic: * 1) We refresh the timer only when we open a block. * By doing this we don't waste cycles refreshing the timer * on packet-by-packet basis. * * With a 1MB block-size, on a 1Gbps line, it will take * i) ~8 ms to fill a block + ii) memcpy etc. * In this cut we are not accounting for the memcpy time. * * So, if the user sets the 'tmo' to 10ms then the timer * will never fire while the block is still getting filled * (which is what we want). However, the user could choose * to close a block early and that's fine. * * But when the timer does fire, we check whether or not to refresh it. * Since the tmo granularity is in msecs, it is not too expensive * to refresh the timer, lets say every '8' msecs. * Either the user can set the 'tmo' or we can derive it based on * a) line-speed and b) block-size. * prb_calc_retire_blk_tmo() calculates the tmo. * */
static void prb_retire_rx_blk_timer_expired(struct timer_list *t) { struct packet_sock *po = from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer); struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; spin_lock(&po->sk.sk_receive_queue.lock); frozen = prb_queue_frozen(pkc); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); if (unlikely(pkc->delete_blk_timer)) goto out; /* We only need to plug the race when the block is partially filled. * tpacket_rcv: * lock(); increment BLOCK_NUM_PKTS; unlock() * copy_bits() is in progress ... * timer fires on other cpu: * we can't retire the current block because copy_bits * is in progress. * */ if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ cpu_relax(); } } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { if (!BLOCK_NUM_PKTS(pbd)) { /* An empty block. Just refresh the timer. */ goto refresh_timer; } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; else goto out; } else { /* Case 1. Queue was frozen because user-space was * lagging behind. */ if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. */ goto refresh_timer; } else { /* Case 2. queue was frozen,user-space caught up, * now the link went idle && the timer fired. * We don't have a block to close.So we open this * block and restart the timer. * opening a block thaws the queue,restarts timer * Thawing/timer-refresh is a side effect. */ prb_open_block(pkc, pbd); goto out; } } } refresh_timer: _prb_refresh_rx_retire_blk_timer(pkc); out: spin_unlock(&po->sk.sk_receive_queue.lock); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke19385.40%240.00%
Kees Cook167.08%120.00%
Alexander Drozdov146.19%120.00%
Duan Jiong31.33%120.00%
Total226100.00%5100.00%


static void prb_flush_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, __u32 status) { /* Flush everything minus the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 u8 *start, *end; start = (u8 *)pbd1; /* Skip the block header(we know header WILL fit in 4K) */ start += PAGE_SIZE; end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); for (; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif /* Now update the block status. */ BLOCK_STATUS(pbd1) = status; /* Flush the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 start = (u8 *)pbd1; flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke121100.00%2100.00%
Total121100.00%2100.00%

/* * Side effect: * * 1) flush the block * 2) Increment active_blk_num * * Note:We DONT refresh the timer on purpose. * Because almost always the next block will be opened. */
static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, struct packet_sock *po, unsigned int stat) { __u32 status = TP_STATUS_USER | stat; struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; last_pkt->tp_next_offset = 0; /* Get the ts of the last pkt */ if (BLOCK_NUM_PKTS(pbd1)) { h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { /* Ok, we tmo'd - so get the current time. * * It shouldn't really happen as we don't close empty * blocks. See prb_retire_rx_blk_timer_expired(). */ struct timespec ts; getnstimeofday(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; h1->ts_last_pkt.ts_nsec = ts.tv_nsec; } smp_wmb(); /* Flush the block */ prb_flush_block(pkc1, pbd1, status); sk->sk_data_ready(sk); pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke16389.07%240.00%
Dan Collins179.29%120.00%
Daniel Borkmann21.09%120.00%
Alexander Drozdov10.55%120.00%
Total183100.00%5100.00%


static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) { pkc->reset_pending_on_curr_blk = 0; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke17100.00%2100.00%
Total17100.00%2100.00%

/* * Side effect of opening a block: * * 1) prb_queue is thawed. * 2) retire_blk_timer is refreshed. * */
static void prb_open_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1) { struct timespec ts; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; smp_rmb(); /* We could have just memset this but we will lose the * flexibility of making the priv area sticky */ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; BLOCK_NUM_PKTS(pbd1) = 0; BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); getnstimeofday(&ts); h1->ts_first_pkt.ts_sec = ts.tv_sec; h1->ts_first_pkt.ts_nsec = ts.tv_nsec; pkc1->pkblk_start = (char *)pbd1; pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; pbd1->version = pkc1->version; pkc1->prev = pkc1->nxt_offset; pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; prb_thaw_queue(pkc1); _prb_refresh_rx_retire_blk_timer(pkc1); smp_wmb(); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke17899.44%266.67%
Daniel Borkmann10.56%133.33%
Total179100.00%3100.00%

/* * Queue freeze logic: * 1) Assume tp_block_nr = 8 blocks. * 2) At time 't0', user opens Rx ring. * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 * 4) user-space is either sleeping or processing block '0'. * 5) tpacket_rcv is currently filling block '7', since there is no space left, * it will close block-7,loop around and try to fill block '0'. * call-flow: * __packet_lookup_frame_in_block * prb_retire_current_block() * prb_dispatch_next_block() * |->(BLOCK_STATUS == USER) evaluates to true * 5.1) Since block-0 is currently in-use, we just freeze the queue. * 6) Now there are two cases: * 6.1) Link goes idle right after the queue is frozen. * But remember, the last open_block() refreshed the timer. * When this timer expires,it will refresh itself so that we can * re-open block-0 in near future. * 6.2) Link is busy and keeps on receiving packets. This is a simple * case and __packet_lookup_frame_in_block will check if block-0 * is free and can now be re-used. */
static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { pkc->reset_pending_on_curr_blk = 1; po->stats.stats3.tp_freeze_q_cnt++; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke3096.77%266.67%
Daniel Borkmann13.23%133.33%
Total31100.00%3100.00%

#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) /* * If the next block is free then we will dispatch it * and return a good offset. * Else, we will freeze the queue. * So, caller must check the return value. */
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { struct tpacket_block_desc *pbd; smp_rmb(); /* 1. Get current block num */ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* 2. If this block is currently in_use then freeze the queue */ if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { prb_freeze_queue(pkc, po); return NULL; } /* * 3. * open this block and return the offset where the first packet * needs to get stored. */ prb_open_block(pkc, pbd); return (void *)pkc->nxt_offset; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke72100.00%2100.00%
Total72100.00%2100.00%


static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po, unsigned int status) { struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* retire/close the current block */ if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { /* * Plug the case where copy_bits() is in progress on * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't * have space to copy the pkt in the current block and * called prb_retire_current_block() * * We don't need to worry about the TMO case because * the timer-handler already handled this case. */ if (!(status & TP_STATUS_BLK_TMO)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ cpu_relax(); } } prb_close_block(pkc, pbd, po, status); return; } }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke85100.00%2100.00%
Total85100.00%2100.00%


static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) { return TP_STATUS_USER & BLOCK_STATUS(pbd); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke19100.00%2100.00%
Total19100.00%2100.00%


static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) { return pkc->reset_pending_on_curr_blk; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke16100.00%2100.00%
Total16100.00%2100.00%


static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); atomic_dec(&pkc->blk_fill_in_prog); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke29100.00%2100.00%
Total29100.00%2100.00%


static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke2896.55%266.67%
Tom Herbert13.45%133.33%
Total29100.00%3100.00%


static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = 0; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke24100.00%2100.00%
Total24100.00%2100.00%


static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { if (skb_vlan_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; ppd->hv1.tp_vlan_tpid = 0; ppd->tp_status = TP_STATUS_AVAILABLE; } }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke5865.91%240.00%
Atzm Watanabe2528.41%120.00%
Daniel Borkmann33.41%120.00%
Jiri Pirko22.27%120.00%
Total88100.00%5100.00%


static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) prb_fill_rxhash(pkc, ppd); else prb_clear_rxhash(pkc, ppd); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke4685.19%266.67%
Atzm Watanabe814.81%133.33%
Total54100.00%3100.00%


static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) { struct tpacket3_hdr *ppd; ppd = (struct tpacket3_hdr *)curr; ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); pkc->prev = curr; pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_NUM_PKTS(pbd) += 1; atomic_inc(&pkc->blk_fill_in_prog); prb_run_all_ft_ops(pkc, ppd); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke94100.00%2100.00%
Total94100.00%2100.00%

/* Assumes caller has the sk->rx_queue.lock */
static void *__packet_lookup_frame_in_block(struct packet_sock *po, struct sk_buff *skb, int status, unsigned int len ) { struct tpacket_kbdq_core *pkc; struct tpacket_block_desc *pbd; char *curr, *end; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* Queue is frozen when user space is lagging behind */ if (prb_queue_frozen(pkc)) { /* * Check if that last block which caused the queue to freeze, * is still in_use by user-space. */ if (prb_curr_blk_in_use(pbd)) { /* Can't record this packet */ return NULL; } else { /* * Ok, the block was released by user-space. * Now let's open that block. * opening a block also thaws the queue. * Thawing is a side effect. */ prb_open_block(pkc, pbd); } } smp_mb(); curr = pkc->nxt_offset; pkc->skb = skb; end = (char *)pbd + pkc->kblk_size; /* first try the current block */ if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* Ok, close the current block */ prb_retire_current_block(pkc, po, 0); /* Now, try to dispatch the next block */ curr = (char *)prb_dispatch_next_block(pkc, po); if (curr) { pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* * No free blocks are available.user_space hasn't caught up yet. * Queue was just frozen and now this packet will get dropped. */ return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke211100.00%2100.00%
Total211100.00%2100.00%


static void *packet_current_rx_frame(struct packet_sock *po, struct sk_buff *skb, int status, unsigned int len) { char *curr = NULL; switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: curr = packet_lookup_frame(po, &po->rx_ring, po->rx_ring.head, status); return curr; case TPACKET_V3: return __packet_lookup_frame_in_block(po, skb, status, len); default: WARN(1, "TPACKET version not supported\n"); BUG(); return NULL; } }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke8588.54%133.33%
Johann Baudy1010.42%133.33%
Ying Xue11.04%133.33%
Total96100.00%3100.00%


static void *prb_lookup_block(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; return pbd; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke3252.46%240.00%
Johann Baudy2642.62%120.00%
Willem de Bruijn23.28%120.00%
Patrick McHardy11.64%120.00%
Total61100.00%5100.00%


static int prb_previous_blk_num(struct packet_ring_buffer *rb) { unsigned int prev; if (rb->prb_bdqc.kactive_blk_num) prev = rb->prb_bdqc.kactive_blk_num-1; else prev = rb->prb_bdqc.knum_blocks-1; return prev; }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke3472.34%133.33%
Johann Baudy1225.53%133.33%
Neil Horman12.13%133.33%
Total47100.00%3100.00%

/* Assumes caller has held the rx_queue.lock */
static void *__prb_previous_block(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = prb_previous_blk_num(rb); return prb_lookup_block(po, rb, previous, status); }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke3482.93%150.00%
Johann Baudy717.07%150.00%
Total41100.00%2100.00%


static void *packet_previous_rx_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { if (po->tp_version <= TPACKET_V2) return packet_previous_frame(po, rb, status); return __prb_previous_block(po, rb, status); }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy3062.50%150.00%
Chetan Loke1837.50%150.00%
Total48100.00%2100.00%


static void packet_increment_rx_head(struct packet_sock *po, struct packet_ring_buffer *rb) { switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: return packet_increment_head(rb); case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); return; } }

Contributors

PersonTokensPropCommitsCommitProp
Chetan Loke51100.00%1100.00%
Total51100.00%1100.00%


static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; return packet_lookup_frame(po, rb, previous, status); }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy4182.00%150.00%
David S. Miller918.00%150.00%
Total50100.00%2100.00%


static void packet_increment_head(struct packet_ring_buffer *buff) { buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller31100.00%1100.00%
Total31100.00%1100.00%


static void packet_inc_pending(struct packet_ring_buffer *rb) { this_cpu_inc(*rb->pending_refcnt); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann19100.00%1100.00%
Total19100.00%1100.00%


static void packet_dec_pending(struct packet_ring_buffer *rb) { this_cpu_dec(*rb->pending_refcnt); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann19100.00%1100.00%
Total19100.00%1100.00%


static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) { unsigned int refcnt = 0; int cpu; /* We don't use pending refcount in rx_ring. */ if (rb->pending_refcnt == NULL) return 0; for_each_possible_cpu(cpu) refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); return refcnt; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann53100.00%1100.00%
Total53100.00%1100.00%


static int packet_alloc_pending(struct packet_sock *po) { po->rx_ring.pending_refcnt = NULL; po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); if (unlikely(po->tx_ring.pending_refcnt == NULL)) return -ENOBUFS; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann50100.00%1100.00%
Total50100.00%1100.00%


static void packet_free_pending(struct packet_sock *po) { free_percpu(po->tx_ring.pending_refcnt); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann20100.00%1100.00%
Total20100.00%1100.00%

#define ROOM_POW_OFF 2 #define ROOM_NONE 0x0 #define ROOM_LOW 0x1 #define ROOM_NORMAL 0x2
static bool __tpacket_has_room(struct packet_sock *po, int pow_off) { int idx, len; len = po->rx_ring.frame_max + 1; idx = po->rx_ring.head; if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn72100.00%2100.00%
Total72100.00%2100.00%


static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) { int idx, len; len = po->rx_ring.prb_bdqc.knum_blocks; idx = po->rx_ring.prb_bdqc.kactive_blk_num; if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn74100.00%1100.00%
Total74100.00%1100.00%


static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { struct sock *sk = &po->sk; int ret = ROOM_NONE; if (po->prot_hook.func != tpacket_rcv) { int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) - (skb ? skb->truesize : 0); if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) return ROOM_NORMAL; else if (avail > 0) return ROOM_LOW; else return ROOM_NONE; } if (po->tp_version == TPACKET_V3) { if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_v3_has_room(po, 0)) ret = ROOM_LOW; } else { if (__tpacket_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_has_room(po, 0)) ret = ROOM_LOW; } return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn167100.00%3100.00%
Total167100.00%3100.00%


static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { int ret; bool has_room; spin_lock_bh(&po->sk.sk_receive_queue.lock); ret = __packet_rcv_has_room(po, skb); has_room = ret == ROOM_NORMAL; if (po->pressure == has_room) po->pressure = !has_room; spin_unlock_bh(&po->sk.sk_receive_queue.lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn79100.00%4100.00%
Total79100.00%4100.00%


static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); return; } sk_refcnt_debug_dec(sk); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller6598.48%150.00%
Elena Reshetova11.52%150.00%
Total66100.00%2100.00%


static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) { u32 rxhash; int i, count = 0; rxhash = skb_get_hash(skb); for (i = 0; i < ROLLOVER_HLEN; i++) if (po->rollover->history[i] == rxhash) count++; po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash; return count > (ROLLOVER_HLEN >> 1); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn85100.00%1100.00%
Total85100.00%1100.00%


static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return reciprocal_scale(__skb_get_hash_symmetric(skb), num); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller2268.75%233.33%
Willem de Bruijn412.50%116.67%
Daniel Borkmann412.50%233.33%
Tom Herbert26.25%116.67%
Total32100.00%6100.00%


static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { unsigned int val = atomic_inc_return(&f->rr_cur); return val % num; }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller2873.68%133.33%
Willem de Bruijn1026.32%266.67%
Total38100.00%3100.00%


static unsigned int fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return smp_processor_id() % num; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn2488.89%150.00%
David S. Miller311.11%150.00%
Total27100.00%2100.00%


static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return prandom_u32_max(num); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann27100.00%2100.00%
Total27100.00%2100.00%


static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, unsigned int idx, bool try_self, unsigned int num) { struct packet_sock *po, *po_next, *po_skip = NULL; unsigned int i, j, room = ROOM_NONE; po = pkt_sk(f->arr[idx]); if (try_self) { room = packet_rcv_has_room(po, skb); if (room == ROOM_NORMAL || (room == ROOM_LOW && !fanout_flow_is_huge(po, skb))) return idx; po_skip = po; } i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(f->arr[i]); if (po_next != po_skip && !po_next->pressure && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; atomic_long_inc(&po->rollover->num); if (room == ROOM_LOW) atomic_long_inc(&po->rollover->num_huge); return i; } if (++i == num) i = 0; } while (i != j); atomic_long_inc(&po->rollover->num_failed); return idx; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn21491.06%888.89%
David S. Miller218.94%111.11%
Total235100.00%9100.00%


static unsigned int fanout_demux_qm(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return skb_get_queue_mapping(skb) % num; }

Contributors

PersonTokensPropCommitsCommitProp
Neil Horman29100.00%1100.00%
Total29100.00%1100.00%


static unsigned int fanout_demux_bpf(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { struct bpf_prog *prog; unsigned int ret = 0; rcu_read_lock(); prog = rcu_dereference(f->bpf_prog); if (prog) ret = bpf_prog_run_clear_cb(prog, skb) % num; rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn6498.46%150.00%
Alexei Starovoitov11.54%150.00%
Total65100.00%2100.00%


static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn2392.00%150.00%
David S. Miller28.00%150.00%
Total25100.00%2100.00%


static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; unsigned int num = READ_ONCE(f->num_members); struct net *net = read_pnet(&f->net); struct packet_sock *po; unsigned int idx; if (!net_eq(dev_net(dev), net) || !num) { kfree_skb(skb); return 0; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } switch (f->type) { case PACKET_FANOUT_HASH: default: idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: idx = fanout_demux_cpu(f, skb, num); break; case PACKET_FANOUT_RND: idx = fanout_demux_rnd(f, skb, num); break; case PACKET_FANOUT_QM: idx = fanout_demux_qm(f, skb, num); break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, false, num); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: idx = fanout_demux_bpf(f, skb, num); break; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) idx = fanout_demux_rollover(f, skb, idx, true, num); po = pkt_sk(f->arr[idx]); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller15451.33%323.08%
Willem de Bruijn8428.00%430.77%
Daniel Borkmann155.00%17.69%
Neil Horman155.00%17.69%
Eric W. Biedermann155.00%17.69%
Alexander Drozdov113.67%17.69%
Eric Dumazet62.00%215.38%
Total300100.00%13100.00%

DEFINE_MUTEX(fanout_mutex); EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static u16 fanout_next_id;
static void __fanout_link(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; spin_lock(&f->lock); f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; if (f->num_members == 1) dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller6078.95%150.00%
Anoob Soman1621.05%150.00%
Total76100.00%2100.00%


static void __fanout_unlink(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; int i; spin_lock(&f->lock); for (i = 0; i < f->num_members; i++) { if (f->arr[i] == sk) break; } BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; if (f->num_members == 0) __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller10586.78%150.00%
Anoob Soman1613.22%150.00%
Total121100.00%2100.00%


static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { if (sk->sk_family != PF_PACKET) return false; return ptype->af_packet_priv == pkt_sk(sk)->fanout; }

Contributors

PersonTokensPropCommitsCommitProp
Eric Leblond2564.10%133.33%
Eric Dumazet1333.33%133.33%
Fengguang Wu12.56%133.33%
Total39100.00%3100.00%


static void fanout_init_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_LB: atomic_set(&f->rr_cur, 0); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: RCU_INIT_POINTER(f->bpf_prog, NULL); break; } }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn49100.00%2100.00%
Total49100.00%2100.00%


static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) { struct bpf_prog *old; spin_lock(&f->lock); old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock)); rcu_assign_pointer(f->bpf_prog, new); spin_unlock(&f->lock); if (old) { synchronize_net(); bpf_prog_destroy(old); } }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn77100.00%1100.00%
Total77100.00%1100.00%


static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, unsigned int len) { struct bpf_prog *new; struct sock_fprog fprog; int ret; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; if (len != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, data, len)) return -EFAULT; ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); if (ret) return ret; __fanout_set_data_bpf(po->fanout, new); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn10998.20%150.00%
Daniel Borkmann21.80%150.00%
Total111100.00%2100.00%


static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, unsigned int len) { struct bpf_prog *new; u32 fd; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; if (len != sizeof(fd)) return -EINVAL; if (copy_from_user(&fd, data, len)) return -EFAULT; new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(new)) return PTR_ERR(new); __fanout_set_data_bpf(po->fanout, new); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn10497.20%150.00%
Daniel Borkmann32.80%150.00%
Total107100.00%2100.00%


static int fanout_set_data(struct packet_sock *po, char __user *data, unsigned int len) { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: return fanout_set_data_cbpf(po, data, len); case PACKET_FANOUT_EBPF: return fanout_set_data_ebpf(po, data, len); default: return -EINVAL; }; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn62100.00%2100.00%
Total62100.00%2100.00%


static void fanout_release_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: __fanout_set_data_bpf(f, NULL); }; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn33100.00%2100.00%
Total33100.00%2100.00%


static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) { struct packet_fanout *f; list_for_each_entry(f, &fanout_list, list) { if (f->id == candidate_id && read_pnet(&f->net) == sock_net(sk)) { return false; } } return true; }

Contributors

PersonTokensPropCommitsCommitProp
Mike Maloney58100.00%1100.00%
Total58100.00%1100.00%


static bool fanout_find_new_id(struct sock *sk, u16 *new_id) { u16 id = fanout_next_id; do { if (__fanout_id_is_free(sk, id)) { *new_id = id; fanout_next_id = id + 1; return true; } id++; } while (id != fanout_next_id); return false; }

Contributors

PersonTokensPropCommitsCommitProp
Mike Maloney61100.00%1100.00%
Total61100.00%1100.00%


static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; u8 flags = type_flags >> 8; int err; switch (type) { case PACKET_FANOUT_ROLLOVER: if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) return -EINVAL; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: case PACKET_FANOUT_QM: case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: break; default: return -EINVAL; } mutex_lock(&fanout_mutex); err = -EALREADY; if (po->fanout) goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { err = -ENOMEM; rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); if (!rollover) goto out; atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { if (id != 0) { err = -EINVAL; goto out; } if (!fanout_find_new_id(sk, &id)) { err = -ENOMEM; goto out; } /* ephemeral flag for the first socket in the group: drop it */ flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); } match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && read_pnet(&f->net) == sock_net(sk)) { match = f; break; } } err = -EINVAL; if (match && match->flags != flags) goto out; if (!match) { err = -ENOMEM; match = kzalloc(sizeof(*match), GFP_KERNEL); if (!match) goto out; write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; match->flags = flags; INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); refcount_set(&match->sk_ref, 0); fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; list_add(&match->list, &fanout_list); } err = -EINVAL; spin_lock(&po->bind_lock); if (po->running && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; } } spin_unlock(&po->bind_lock); if (err && !refcount_read(&match->sk_ref)) { list_del(&match->list); kfree(match); } out: kfree(rollover); mutex_unlock(&fanout_mutex); return err; }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller34053.21%315.00%
Willem de Bruijn14422.54%735.00%
Mike Maloney6610.33%210.00%
Eric Dumazet558.61%210.00%
Elena Reshetova132.03%15.00%
Eric Leblond81.25%15.00%
Johann Baudy60.94%15.00%
Daniel Borkmann30.47%15.00%
Neil Horman30.47%15.00%
Ulisses Alonso Camaró10.16%15.00%
Total639100.00%20100.00%

/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. * It is the responsibility of the caller to call fanout_release_data() and * free the returned packet_fanout (after synchronize_net()) */
static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; mutex_lock(&fanout_mutex); f = po->fanout; if (f) { po->fanout = NULL; if (refcount_dec_and_test(&f->sk_ref)) list_del(&f->list); else f = NULL; } mutex_unlock(&fanout_mutex); return f; }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller3440.48%111.11%
Eric Dumazet1416.67%111.11%
Linus Torvalds (pre-git)1315.48%111.11%
Anoob Soman910.71%111.11%
Pavel Emelyanov67.14%111.11%
Richard Cochran44.76%111.11%
James Morris22.38%111.11%
Elena Reshetova11.19%111.11%
Adrian Bunk11.19%111.11%
Total84100.00%9100.00%


static bool packet_extra_vlan_len_allowed(const struct net_device *dev, struct sk_buff *skb) { /* Earlier code assumed this would be a VLAN pkt, double-check * this now that we have the actual packet in hand. We can only * do this check on Ethernet devices. */ if (unlikely(dev->type != ARPHRD_ETHER)) return false; skb_reset_mac_header(skb); return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann53100.00%1100.00%
Total53100.00%1100.00%

static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt;
static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; /* * When we registered the protocol we saved the socket in the data * field for just this event. */ sk = pt->af_packet_priv; /* * Yank back the headers [hope the device set this * right or kerboom...] * * Incoming packets have ll header pulled, * push it back. * * For outgoing ones skb->data == skb_mac_header(skb) * so that this procedure is noop. */ if (skb->pkt_type == PACKET_LOOPBACK) goto out; if (!net_eq(dev_net(dev), sock_net(sk))) goto out; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) goto oom; /* drop any routing info */ skb_dst_drop(skb); /* drop conntrack reference */ nf_reset(skb); spkt = &PACKET_SKB_CB(skb)->sa.pkt; skb_push(skb, skb->data - skb_mac_header(skb)); /* * The SOCK_PACKET socket receives _all_ frames. */ spkt->spkt_family = dev->type; strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); spkt->spkt_protocol = skb->protocol; /* * Charge the memory to the socket. This is done specifically * to prevent sockets using all the memory up. */ if (sock_queue_rcv_skb(sk, skb) == 0) return 0; out: kfree_skb(skb); oom: return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)14173.44%527.78%
Herbert Xu73.65%15.56%
David S. Miller63.12%211.11%
Hideaki Yoshifuji / 吉藤英明63.12%211.11%
Denis V. Lunev63.12%15.56%
Phil Oester63.12%15.56%
Octavian Purdila52.60%15.56%
Stephen Hemminger52.60%15.56%
Eric Dumazet52.60%211.11%
Arnaldo Carvalho de Melo42.08%15.56%
Benjamin Collins10.52%15.56%
Total192100.00%18100.00%

/* * Output a raw packet to a device layer. This bypasses all the other * protocol layers and you must therefore supply it with a complete frame */
static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; /* * Get and verify the address. */ if (saddr) { if (msg->msg_namelen < sizeof(struct sockaddr)) return -EINVAL; if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) proto = saddr->spkt_protocol; } else return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ /* * Find the device first to size check it */ saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; retry: rcu_read_lock(); dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; err = -ENETDOWN; if (!(dev->flags & IFF_UP)) goto out_unlock; /* * You may not queue a frame bigger than the mtu. This is the lowest level * raw protocol and you must do your own fragmentation at this level. */ if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { size_t reserved = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; rcu_read_unlock(); skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* FIXME: Save some space for broken drivers that write a hard * header at transmission time by themselves. PPP is the notable * one here. This should really be fixed at the driver level. */ skb_reserve(skb, reserved); skb_reset_network_header(skb); /* Try to align data part correctly */ if (hhlen) { skb->data -= hhlen; skb->tail -= hhlen; if (len < hhlen) skb_reset_network_header(skb); } err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) goto out_free; goto retry; } if (!dev_validate_header(dev, skb->data, len)) { err = -EINVAL; goto out_unlock; } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_unlock; } sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; skb_probe_transport_header(skb, 0); dev_queue_xmit(skb); rcu_read_unlock(); return len; out_unlock: rcu_read_unlock(); out_free: kfree_skb(skb); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)24442.66%515.15%
Ben Greear8815.38%26.06%
Eric Dumazet7613.29%412.12%
Soheil Hassas Yeganeh437.52%13.03%
Willem de Bruijn244.20%13.03%
David S. Miller193.32%13.03%
Daniel Borkmann132.27%26.06%
Richard Cochran111.92%13.03%
Herbert Xu91.57%13.03%
Alexey Kuznetsov71.22%13.03%
Jason (Hui) Wang71.22%26.06%
Steffen Hurrle71.22%13.03%
Arnaldo Carvalho de Melo71.22%26.06%
Hideaki Yoshifuji / 吉藤英明50.87%26.06%
Oliver Hartkopp40.70%13.03%
Yoshihiro Shimoda30.52%13.03%
Al Viro20.35%26.06%
Denis V. Lunev10.17%13.03%
Stephen Hemminger10.17%13.03%
Eric W. Biedermann10.17%13.03%
Total572100.00%33100.00%


static unsigned int run_filter(struct sk_buff *skb, const struct sock *sk, unsigned int res) { struct sk_filter *filter; rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter != NULL) res = bpf_prog_run_clear_cb(filter->prog, skb); rcu_read_unlock(); return res; }

Contributors

PersonTokensPropCommitsCommitProp
Mitchell Blank Jr.4369.35%114.29%
Eric Dumazet69.68%342.86%
Dmitry Mishin58.06%114.29%
David S. Miller58.06%114.29%
Alexei Starovoitov34.84%114.29%
Total62100.00%7100.00%


static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, size_t *len) { struct virtio_net_hdr vnet_hdr; if (*len < sizeof(vnet_hdr)) return -EINVAL; *len -= sizeof(vnet_hdr); if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true)) return -EINVAL; return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn6779.76%116.67%
Linus Torvalds (pre-git)910.71%233.33%
Jarno Rajahalme44.76%116.67%
David S. Miller22.38%116.67%
Jason (Hui) Wang22.38%116.67%
Total84100.00%6100.00%

/* * This function makes lazy skb cloning in hope that most of packets * are discarded by BPF. * * Note tricky part: we DO mangle shared skb! skb->data, skb->len * and skb->cb are mangled. It works because (and until) packets * falling here are owned by current CPU. Output packets are cloned * by dev_queue_xmit_nit(), input packets are processed by net_bh * sequencially, so that if we return skb to original state on exit, * we will not harm anyone. */
static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_ll *sll; struct packet_sock *po; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; bool is_drop_n_account = false; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; skb->dev = dev; if (dev->header_ops) { /* The device has an explicit notion of ll header, * exported to higher levels. * * Otherwise, the device hides details of its frame * structure, so that corresponding packet head is * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb->len; res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; if (snaplen > res) snaplen = res; if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto drop_n_acct; if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (nskb == NULL) goto drop_n_acct; if (skb_head != skb->data) { skb->data = skb_head; skb->len = skb_len; } consume_skb(skb); skb = nskb; } sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; sll->sll_halen = dev_parse_header(skb, sll->sll_addr); /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). * Use their space for storing the original skb length. */ PACKET_SKB_CB(skb)->sa.origlen = skb->len; if (pskb_trim(skb, snaplen)) goto drop_n_acct; skb_set_owner_r(skb, sk); skb->dev = NULL; skb_dst_drop(skb); /* drop conntrack reference */ nf_reset(skb); spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; sock_skb_set_dropcount(sk, skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); return 0; drop_n_acct: is_drop_n_account = true; spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_drops++; atomic_inc(&sk->sk_drops); spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: if (!is_drop_n_account) consume_skb(skb); else kfree_skb(skb); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn29953.01%29.09%
Linus Torvalds (pre-git)16929.96%522.73%
Weongyo Jeong203.55%14.55%
Herbert Xu203.55%29.09%
Peter P. Waskiewicz Jr183.19%14.55%
Eyal Birger61.06%29.09%
Neil Horman61.06%29.09%
Phil Oester61.06%14.55%
Linus Torvalds61.06%14.55%
Stephen Hemminger50.89%29.09%
Arnaldo Carvalho de Melo40.71%14.55%
Daniel Borkmann40.71%14.55%
Eric Dumazet10.18%14.55%
Total564100.00%22100.00%


static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing * userspace to call getsockopt(..., PACKET_HDRLEN, ...). */ BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; if (dev->header_ops) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb->len; res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && (skb->ip_summed == CHECKSUM_COMPLETE || skb_csum_unnecessary(skb))) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) snaplen = res; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + po->tp_reserve; } else { unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; if (po->has_vnet_hdr) { netoff += sizeof(struct virtio_net_hdr); do_vnet = true; } macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { if (po->copy_thresh && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { if (skb_shared(skb)) { copy_skb = skb_clone(skb, GFP_ATOMIC); } else { copy_skb = skb_get(skb); skb_head = skb->data; } if (copy_skb) skb_set_owner_r(copy_skb, sk); } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { snaplen = 0; do_vnet = false; } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { u32 nval; nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", snaplen, nval, macoff); snaplen = nval; if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; do_vnet = false; } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* * LOSING will be reported till you read the stats, * because it's COR - Clear On Read. * Anyways, moving it for V1/V2 only as V3 doesn't need this * at packet level. */ if (po->stats.stats1.tp_drops) status |= TP_STATUS_LOSING; } po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } spin_unlock(&sk->sk_receive_queue.lock); if (do_vnet) { if (virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), vio_le(), true)) { spin_lock(&sk->sk_receive_queue.lock); goto drop_n_account; } } skb_copy_bits(skb, 0, h.raw + macoff, snaplen); if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) getnstimeofday(&ts); status |= ts_status; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_len = skb->len; h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; hdrlen = sizeof(*h.h1); break; case TPACKET_V2: h.h2->tp_len = skb->len; h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; if (skb_vlan_tag_present(skb)) { h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; h.h2->tp_vlan_tpid = 0; } memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: /* tp_nxt_offset,vlan are already populated above. * So DONT clear those fields here */ h.h3->tp_status |= status; h.h3->tp_len = skb->len; h.h3->tp_snaplen = snaplen; h.h3->tp_mac = macoff; h.h3->tp_net = netoff; h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: BUG(); } sll = h.raw + TPACKET_ALIGN(hdrlen); sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; smp_mb(); #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 if (po->tp_version <= TPACKET_V2) { u8 *start, *end; end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + macoff + snaplen); for (start = h.raw; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); } smp_wmb(); #endif if (po->tp_version <= TPACKET_V2) { __packet_set_status(po, h.raw, status); sk->sk_data_ready(sk); } else { prb_clear_blk_fill_status(&po->rx_ring); } drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: if (!is_drop_n_account) consume_skb(skb); else kfree_skb(skb); return 0; drop_n_account: is_drop_n_account = true; po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); kfree_skb(copy_skb); goto drop_n_restore; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)44631.63%35.26%
Chetan Loke16211.49%23.51%
Patrick McHardy15310.85%47.02%
Eric Dumazet1007.09%58.77%
Atzm Watanabe896.31%35.26%
Johann Baudy866.10%11.75%
Willem de Bruijn543.83%11.75%
Daniel Borkmann533.76%610.53%
Alexander Drozdov382.70%23.51%
Changli Gao362.55%23.51%
David S. Miller292.06%47.02%
Ben Greear231.63%11.75%
Benjamin Poirier221.56%11.75%
Weongyo Jeong201.42%11.75%
Peter P. Waskiewicz Jr181.28%11.75%
Arnaldo Carvalho de Melo171.21%47.02%
Linus Torvalds151.06%23.51%
Dan Collins110.78%11.75%
Denis V. Lunev90.64%11.75%
Hideaki Yoshifuji / 吉藤英明60.43%23.51%
Octavian Purdila50.35%11.75%
Jarno Rajahalme40.28%11.75%
Ulisses Alonso Camaró30.21%11.75%
Stephen Hemminger20.14%23.51%
Jiri Pirko20.14%11.75%
Jason (Hui) Wang20.14%11.75%
Mitchell Blank Jr.20.14%11.75%
Herbert Xu20.14%11.75%
Ralf Bächle10.07%11.75%
Total1410100.00%57100.00%


static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); if (likely(po->tx_ring.pg_vec)) { void *ph; __u32 ts; ph = skb_shinfo(skb)->destructor_arg; packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); } sock_wfree(skb); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn87100.00%1100.00%
Total87100.00%1100.00%


static void tpacket_set_protocol(const struct net_device *dev, struct sk_buff *skb) { if (dev->type == ARPHRD_ETHER) { skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; } }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn43100.00%1100.00%
Total43100.00%1100.00%


static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) { if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) return -EINVAL; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn111100.00%1100.00%
Total111100.00%1100.00%


static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, struct virtio_net_hdr *vnet_hdr) { if (*len < sizeof(*vnet_hdr)) return -EINVAL; *len -= sizeof(*vnet_hdr); if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter)) return -EFAULT; return __packet_snd_vnet_parse(vnet_hdr, *len); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn5572.37%250.00%
Johann Baudy1722.37%125.00%
Al Viro45.26%125.00%
Total76100.00%4100.00%


static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, __be16 proto, unsigned char *addr, int hlen, int copylen, const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; int err; ph.raw = frame; skb->protocol = proto; skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; skb_reserve(skb, hlen); skb_reset_network_header(skb); to_write = tp_len; if (sock->type == SOCK_DGRAM) { err = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, tp_len); if (unlikely(err < 0)) return -EINVAL; } else if (copylen) { int hdrlen = min_t(int, copylen, tp_len); skb_push(skb, dev->hard_header_len); skb_put(skb, copylen - dev->hard_header_len); err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; if (!dev_validate_header(dev, skb->data, hdrlen)) return -EINVAL; if (!skb->protocol) tpacket_set_protocol(dev, skb); data += hdrlen; to_write -= hdrlen; } offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { pr_err("Packet exceed the number of skb frags(%lu)\n", MAX_SKB_FRAGS); return -EFAULT; } page = pgv_to_page(data); data += len; flush_dcache_page(page); get_page(page); skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; len = ((to_write > len_max) ? len_max : to_write); } skb_probe_transport_header(skb, 0); return tp_len; }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy27356.52%19.09%
Willem de Bruijn16834.78%436.36%
Daniel Borkmann173.52%218.18%
Soheil Hassas Yeganeh102.07%19.09%
Eric Dumazet102.07%19.09%
Herbert Xu40.83%19.09%
Elena Reshetova10.21%19.09%
Total483100.00%11100.00%


static int tpacket_parse_header(struct packet_sock *po, void *frame, int size_max, void **data) { union tpacket_uhdr ph; int tp_len, off; ph.raw = frame; switch (po->tp_version) { case TPACKET_V3: if (ph.h3->tp_next_offset != 0) { pr_warn_once("variable sized slot not supported"); return -EINVAL; } tp_len = ph.h3->tp_len; break; case TPACKET_V2: tp_len = ph.h2->tp_len; break; default: tp_len = ph.h1->tp_len; break; } if (unlikely(tp_len > size_max)) { pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); return -EMSGSIZE; } if (unlikely(po->tp_tx_has_off)) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); off_max = po->tx_ring.frame_size - tp_len; if (po->sk.sk_type == SOCK_DGRAM) { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_net; break; case TPACKET_V2: off = ph.h2->tp_net; break; default: off = ph.h1->tp_net; break; } } else { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_mac; break; case TPACKET_V2: off = ph.h2->tp_mac; break; default: off = ph.h1->tp_mac; break; } } if (unlikely((off < off_min) || (off_max < off))) return -EINVAL; } else { off = po->tp_hdrlen - sizeof(struct sockaddr_ll); } *data = frame + off; return tp_len; }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn21068.18%120.00%
Sowmini Varadhan5718.51%120.00%
Johann Baudy3712.01%120.00%
Daniel Borkmann30.97%120.00%
Eric Dumazet10.32%120.00%
Total308100.00%5100.00%


static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int tp_len, size_max; unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; mutex_lock(&po->pg_vec_lock); if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } err = -ENXIO; if (unlikely(dev == NULL)) goto out; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; sockc.tsflags = po->sk.sk_tsflags; if (msg->msg_controllen) { err = sock_cmsg_send(&po->sk, msg, &sockc); if (unlikely(err)) goto out_put; } if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { if (need_wait && need_resched()) schedule(); continue; } skb = NULL; tp_len = tpacket_parse_header(po, ph, size_max, &data); if (tp_len < 0) goto tpacket_error; status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; if (po->has_vnet_hdr) { vnet_hdr = data; data += sizeof(*vnet_hdr); tp_len -= sizeof(*vnet_hdr); if (tp_len < 0 || __packet_snd_vnet_parse(vnet_hdr, tp_len)) { tp_len = -EINVAL; goto tpacket_error; } copylen = __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len); } copylen = max_t(int, copylen, dev->hard_header_len); skb = sock_alloc_send_skb(&po->sk, hlen + tlen + sizeof(struct sockaddr_ll) + (copylen - dev->hard_header_len), !need_wait, &err); if (unlikely(skb == NULL)) { /* we assume the socket was initially writeable ... */ if (likely(len_sum > 0)) err = len_sum; goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !po->has_vnet_hdr && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { tpacket_error: if (po->tp_loss) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); kfree_skb(skb); continue; } else { status = TP_STATUS_WRONG_FORMAT; err = tp_len; goto out_status; } } if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; } skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); if (unlikely(err > 0)) { err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ skb = NULL; goto out_status; } /* * skb was dropped but not destructed yet; * let's treat it like congestion or err < 0 */ err = 0; } packet_increment_head(&po->tx_ring); len_sum += tp_len; } while (likely((ph != NULL) || /* Note: packet_read_pending() might be slow if we have * to call it as it's per_cpu variable, but in fast-path * we already short-circuit the loop with the first * condition, and luckily don't have to go that path * anyway. */ (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy42146.83%13.57%
Willem de Bruijn16718.58%27.14%
Daniel Borkmann788.68%932.14%
Soheil Hassas Yeganeh424.67%13.57%
Douglas Caetano dos Santos404.45%13.57%
Jarek Poplawski293.23%13.57%
Herbert Xu232.56%13.57%
David S. Miller222.45%13.57%
Linus Torvalds (pre-git)212.34%310.71%
Mathias Kretschmer182.00%13.57%
Linus Torvalds80.89%13.57%
Steffen Hurrle70.78%13.57%
Alexander Drozdov70.78%13.57%
Yoshihiro Shimoda50.56%13.57%
Ben Greear50.56%13.57%
Jarno Rajahalme40.44%13.57%
Gabor Gombas20.22%13.57%
Total899100.00%28100.00%


static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, size_t reserve, size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, err, 0); if (!skb) return NULL; skb_reserve(skb, reserve); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; }

Contributors

PersonTokensPropCommitsCommitProp
Sridhar Samudrala11398.26%150.00%
Eric Dumazet21.74%150.00%
Total115100.00%2100.00%


static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); struct sk_buff *skb; struct net_device *dev; __be16 proto; unsigned char *addr; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); bool has_vnet_hdr = false; int hlen, tlen, linear; int extra_len = 0; /* * Get and verify the address. */ if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); } err = -ENXIO; if (unlikely(dev == NULL)) goto out_unlock; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; sockc.tsflags = sk->sk_tsflags; sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; if (po->has_vnet_hdr) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; has_vnet_hdr = true; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); linear = max(linear, min_t(int, len, dev->hard_header_len)); skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; skb_set_network_header(skb, reserve); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; } /* Returns -EFAULT on error */ err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len); if (err) goto out_free; if (sock->type == SOCK_RAW && !dev_validate_header(dev, skb->data, len)) { err = -EINVAL; goto out_free; } sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_free; } skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sockc.mark; if (has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) goto out_free; len += sizeof(vnet_hdr); } skb_probe_transport_header(skb, reserve); if (unlikely(extra_len == 4)) skb->no_fcs = 1; err = po->xmit(skb); if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: if (dev) dev_put(dev); out: return err; }

Contributors

PersonTokensPropCommitsCommitProp
Sridhar Samudrala19424.37%12.27%
Willem de Bruijn18823.62%511.36%
Linus Torvalds (pre-git)16921.23%49.09%
Ben Greear607.54%36.82%
David S. Miller324.02%36.82%
Eric W. Biedermann232.89%12.27%
Herbert Xu222.76%12.27%
Phil Sutter192.39%24.55%
Daniel Borkmann141.76%49.09%
Richard Cochran111.38%12.27%
Jason (Hui) Wang91.13%24.55%
Soheil Hassas Yeganeh91.13%12.27%
Edward Hyunkoo Jee70.88%12.27%
Steffen Hurrle70.88%12.27%
Michael S. Tsirkin50.63%12.27%
Eric Dumazet50.63%12.27%
Jarno Rajahalme40.50%12.27%
Oliver Hartkopp40.50%12.27%
Al Viro40.50%36.82%
Yoshihiro Shimoda30.38%12.27%
Arnaldo Carvalho de Melo20.25%24.55%
Hideaki Yoshifuji / 吉藤英明20.25%12.27%
Christoph Jaeger10.13%12.27%
Stephen Hemminger10.13%12.27%
Johann Baudy10.13%12.27%
Total796100.00%44100.00%


static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); if (po->tx_ring.pg_vec) return tpacket_snd(po, msg); else return packet_snd(sock, msg, len); }

Contributors

PersonTokensPropCommitsCommitProp
Johann Baudy65100.00%1100.00%
Total65100.00%1100.00%

/* * Close a PACKET socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. */
static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; if (!sk) return 0; net = sock_net(sk); po = pkt_sk(sk); mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); packet_cached_dev_reset(po); if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); packet_flush_mclist(sk); if (po->rx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); } if (po->tx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); } f = fanout_release(sk); synchronize_net(); if (f) { kfree(po->rollover); fanout_release_data(f); kfree(f); } /* * Now the socket is dead. No more input will appear. */ sock_orphan(sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); packet_free_pending(po); sk_refcnt_debug_release(sk); sock_put(sk); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8828.76%312.00%
Johann Baudy278.82%14.00%
Ben Greear278.82%14.00%
Phil Sutter258.17%14.00%
Anoob Soman237.52%14.00%
Pavel Emelyanov216.86%28.00%
Stephen Hemminger206.54%14.00%
Denis V. Lunev144.58%28.00%
Eric Dumazet123.92%14.00%
David S. Miller113.59%312.00%
Daniel Borkmann103.27%28.00%
Dave Jones72.29%14.00%
Chetan Loke72.29%14.00%
Mike Maloney72.29%14.00%
Arnaldo Carvalho de Melo41.31%312.00%
Hideaki Yoshifuji / 吉藤英明30.98%14.00%
Total306100.00%25100.00%

/* * Attach a packet hook. */
static int packet_do_bind(struct sock *sk, const char *name, int ifindex, __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; struct net_device *dev = NULL; int ret = 0; bool unlisted = false; lock_sock(sk); spin_lock(&po->bind_lock); rcu_read_lock(); if (po->fanout) { ret = -EINVAL; goto out_unlock; } if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { ret = -ENODEV; goto out_unlock; } } else if (ifindex) { dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (!dev) { ret = -ENODEV; goto out_unlock; } } if (dev) dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { if (po->running) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() */ po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; if (dev) unlisted = !dev_get_by_index_rcu(sock_net(sk), dev->ifindex); } BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; if (unlikely(unlisted)) { dev_put(dev); po->prot_hook.dev = NULL; po->ifindex = -1; packet_cached_dev_reset(po); } else { po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } } if (dev_curr) dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); } out_unlock: rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Francesco Ruggeri19043.88%15.88%
Linus Torvalds (pre-git)11125.64%211.76%
Daniel Borkmann5713.16%211.76%
David S. Miller173.93%211.76%
Willem de Bruijn163.70%15.88%
Eric Dumazet143.23%15.88%
Lars Westerhoff92.08%15.88%
Arnaldo Carvalho de Melo61.39%317.65%
Stephen Hemminger61.39%15.88%
Urs Thuermann40.92%15.88%
James Morris20.46%15.88%
Al Viro10.23%15.88%
Total433100.00%17100.00%

/* * Bind a packet socket to a device */
static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality */ if (addr_len != sizeof(struct sockaddr)) return -EINVAL; /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)6462.75%222.22%
Alexander Potapenko2524.51%111.11%
Benjamin Collins43.92%111.11%
David S. Miller32.94%111.11%
Francesco Ruggeri32.94%111.11%
Eric W. Biedermann10.98%111.11%
Denis V. Lunev10.98%111.11%
Hideaki Yoshifuji / 吉藤英明10.98%111.11%
Total102100.00%9100.00%


static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; /* * Check legality */ if (addr_len < sizeof(struct sockaddr_ll)) return -EINVAL; if (sll->sll_family != AF_PACKET) return -EINVAL; return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol ? : pkt_sk(sk)->num); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8189.01%228.57%
Francesco Ruggeri44.40%114.29%
David S. Miller33.30%114.29%
Eric W. Biedermann11.10%114.29%
Denis V. Lunev11.10%114.29%
Hideaki Yoshifuji / 吉藤英明11.10%114.29%
Total91100.00%7100.00%

static struct proto packet_proto = { .name = "PACKET", .owner = THIS_MODULE, .obj_size = sizeof(struct packet_sock), }; /* * Create a packet of type SOCK_PACKET. */
static int packet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; sock->ops = &packet_ops; if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; sock_init_data(sock, sk); po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; err = packet_alloc_pending(po); if (err) goto out2; packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); /* * Attach a protocol block */ spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); po->rollover = NULL; po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; if (proto) { po->prot_hook.type = proto; register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); preempt_enable(); return 0; out2: sk_free(sk); out: return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)20158.77%310.34%
Daniel Borkmann329.36%413.79%
Pavel Emelyanov185.26%26.90%
David S. Miller144.09%413.79%
Eric W. Biedermann144.09%310.34%
Al Viro133.80%13.45%
Arnaldo Carvalho de Melo113.22%413.79%
Eric Dumazet113.22%26.90%
Denis V. Lunev102.92%26.90%
Herbert Xu82.34%13.45%
Willem de Bruijn61.75%13.45%
Eric Paris30.88%13.45%
Stephen Hemminger10.29%13.45%
Total342100.00%29100.00%

/* * Pull a packet from our receive queue and hand it to the user. * If necessary we block. */
static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; int vnet_hdr_len = 0; unsigned int origlen = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) goto out; #if 0 /* What error should we return now? EUNATTACH? */ if (pkt_sk(sk)->ifindex < 0) return -ENODEV; #endif if (flags & MSG_ERRQUEUE) { err = sock_recv_errqueue(sk, msg, len, SOL_PACKET, PACKET_TX_TIMESTAMP); goto out; } /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. * * Now it will return ENETDOWN, if device have just gone down, * but then it will block. */ skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); /* * An error occurred so return it. Because skb_recv_datagram() * handles the blocking we don't see and worry about blocking * retries. */ if (skb == NULL) goto out; if (pkt_sk(sk)->pressure) packet_rcv_has_room(pkt_sk(sk), NULL); if (pkt_sk(sk)->has_vnet_hdr) { err = packet_rcv_vnet(msg, skb, &len); if (err) goto out_free; vnet_hdr_len = sizeof(struct virtio_net_hdr); } /* You lose any data beyond the buffer you gave. If it worries * a user program they can ask the device for its MTU * anyway. */ copied = skb->len; if (copied > len) { copied = len; msg->msg_flags |= MSG_TRUNC; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; if (sock->type != SOCK_PACKET) { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; /* Original length was stored in sockaddr_ll fields */ origlen = PACKET_SKB_CB(skb)->sa.origlen; sll->sll_family = AF_PACKET; sll->sll_protocol = skb->protocol; } sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && (skb->ip_summed == CHECKSUM_COMPLETE || skb_csum_unnecessary(skb))) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); if (skb_vlan_tag_present(skb)) { aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; } put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } /* * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. */ err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); out_free: skb_free_datagram(sk, skb); out: return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)14023.97%414.29%
Sridhar Samudrala8614.73%13.57%
Herbert Xu8314.21%27.14%
Hannes Frederic Sowa589.93%13.57%
Eyal Birger579.76%13.57%
Richard Cochran284.79%27.14%
Alexander Drozdov284.79%13.57%
Willem de Bruijn284.79%27.14%
Ben Greear223.77%13.57%
Atzm Watanabe152.57%13.57%
Steffen Hurrle91.54%13.57%
Eric Dumazet71.20%27.14%
Patrick McHardy61.03%13.57%
Linus Torvalds40.68%13.57%
Arnaldo Carvalho de Melo30.51%13.57%
Michael S. Tsirkin30.51%13.57%
Jiri Pirko20.34%13.57%
Eric W. Biedermann20.34%13.57%
Stephen Hemminger10.17%13.57%
Neil Horman10.17%13.57%
David S. Miller10.17%13.57%
Total584100.00%28100.00%


static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct net_device *dev; struct sock *sk = sock->sk; if (peer) return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); *uaddr_len = sizeof(*uaddr); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)9072.58%333.33%
Daniel Borkmann1915.32%111.11%
Eric Dumazet75.65%111.11%
David S. Miller32.42%111.11%
Hideaki Yoshifuji / 吉藤英明32.42%111.11%
Eric W. Biedermann10.81%111.11%
Denis V. Lunev10.81%111.11%
Total124100.00%9100.00%


static int packet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct net_device *dev; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); if (peer) return -EOPNOTSUPP; sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; sll->sll_pkttype = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } rcu_read_unlock(); *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)13975.14%216.67%
David S. Miller115.95%216.67%
Eric W. Biedermann105.41%216.67%
Eric Dumazet73.78%18.33%
Cyrill V. Gorcunov73.78%18.33%
Vasiliy Kulikov63.24%18.33%
Hideaki Yoshifuji / 吉藤英明31.62%18.33%
Arnaldo Carvalho de Melo10.54%18.33%
Denis V. Lunev10.54%18.33%
Total185100.00%12100.00%


static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) { switch (i->type) { case PACKET_MR_MULTICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr); else return dev_mc_del(dev, i->addr); break; case PACKET_MR_PROMISC: return dev_set_promiscuity(dev, what); case PACKET_MR_ALLMULTI: return dev_set_allmulti(dev, what); case PACKET_MR_UNICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_uc_add(dev, i->addr); else return dev_uc_del(dev, i->addr); break; default: break; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)7149.31%225.00%
Eric W. Biedermann3322.92%112.50%
Jiri Pirko3121.53%337.50%
Wang Chen85.56%112.50%
Eric Dumazet10.69%112.50%
Total144100.00%8100.00%


static void packet_dev_mclist_delete(struct net_device *dev, struct packet_mclist **mlp) { struct packet_mclist *ml; while ((ml = *mlp) != NULL) { if (ml->ifindex == dev->ifindex) { packet_dev_mc(dev, ml, -1); *mlp = ml->next; kfree(ml); } else mlp = &ml->next; } }

Contributors

PersonTokensPropCommitsCommitProp
Francesco Ruggeri4457.14%133.33%
Linus Torvalds (pre-git)3342.86%266.67%
Total77100.00%3100.00%


static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; struct net_device *dev; int err; rtnl_lock(); err = -ENODEV; dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); if (!dev) goto done; err = -EINVAL; if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; i = kmalloc(sizeof(*i), GFP_KERNEL); if (i == NULL) goto done; err = 0; for (ml = po->mclist; ml; ml = ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { ml->count++; /* Free the new element ... */ kfree(i); goto done; } } i->type = mreq->mr_type; i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; err = packet_dev_mc(dev, i, 1); if (err) { po->mclist = i->next; kfree(i); } done: rtnl_unlock(); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)25379.31%323.08%
Mathias Krause247.52%17.69%
Wang Chen216.58%17.69%
David S. Miller134.08%215.38%
Hideaki Yoshifuji / 吉藤英明30.94%17.69%
Eric W. Biedermann20.63%215.38%
Denis V. Lunev10.31%17.69%
Jiri Pirko10.31%17.69%
Arnaldo Carvalho de Melo10.31%17.69%
Total319100.00%13100.00%


static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; rtnl_lock(); for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev) packet_dev_mc(dev, ml, -1); kfree(ml); } break; } } rtnl_unlock(); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)15392.17%220.00%
David S. Miller42.41%220.00%
Hideaki Yoshifuji / 吉藤英明31.81%110.00%
Francesco Ruggeri21.20%110.00%
Eric W. Biedermann21.20%220.00%
Eric Dumazet10.60%110.00%
Denis V. Lunev10.60%110.00%
Total166100.00%10100.00%


static void packet_flush_mclist(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml; if (!po->mclist) return; rtnl_lock(); while ((ml = po->mclist) != NULL) { struct net_device *dev; po->mclist = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev != NULL) packet_dev_mc(dev, ml, -1); kfree(ml); } rtnl_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)7775.49%220.00%
David S. Miller1413.73%220.00%
Eric Dumazet54.90%220.00%
Hideaki Yoshifuji / 吉藤英明32.94%110.00%
Arnaldo Carvalho de Melo10.98%110.00%
Eric W. Biedermann10.98%110.00%
Denis V. Lunev10.98%110.00%
Total102100.00%10100.00%


static int packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) return -ENOPROTOOPT; switch (optname) { case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { struct packet_mreq_max mreq; int len = optlen; memset(&mreq, 0, sizeof(mreq)); if (len < sizeof(struct packet_mreq)) return -EINVAL; if (len > sizeof(mreq)) len = sizeof(mreq); if (copy_from_user(&mreq, optval, len)) return -EFAULT; if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else ret = packet_mc_drop(sk, &mreq); return ret; } case PACKET_RX_RING: case PACKET_TX_RING: { union tpacket_req_u req_u; int len; switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: len = sizeof(req_u.req); break; case TPACKET_V3: default: len = sizeof(req_u.req3); break; } if (optlen < len) return -EINVAL; if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; return packet_set_ring(sk, &req_u, 0, optname == PACKET_TX_RING); } case PACKET_COPY_THRESH: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; pkt_sk(sk)->copy_thresh = val; return 0; } case PACKET_VERSION: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: break; default: return -EINVAL; } lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_version = val; ret = 0; } release_sock(sk); return ret; } case PACKET_RESERVE: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_reserve = val; ret = 0; } release_sock(sk); return ret; } case PACKET_LOSS: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->tp_loss = !!val; return 0; } case PACKET_AUXDATA: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->auxdata = !!val; return 0; } case PACKET_ORIGDEV: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->origdev = !!val; return 0; } case PACKET_VNET_HDR: { int val; if (sock->type != SOCK_RAW) return -EINVAL; if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->has_vnet_hdr = !!val; return 0; } case PACKET_TIMESTAMP: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->tp_tstamp = val; return 0; } case PACKET_FANOUT: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; return fanout_add(sk, val & 0xffff, val >> 16); } case PACKET_FANOUT_DATA: { if (!po->fanout) return -EINVAL; return fanout_set_data(po, optval, optlen); } case PACKET_TX_HAS_OFF: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->tp_tx_has_off = !!val; return 0; } case PACKET_QDISC_BYPASS: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->xmit = val ? packet_direct_xmit : dev_queue_xmit; return 0; } default: return -ENOPROTOOPT; } }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)19818.75%29.52%
Patrick McHardy10910.32%29.52%
Sridhar Samudrala817.67%14.76%
Johann Baudy747.01%14.76%
Chetan Loke706.63%14.76%
Paul Chavent706.63%14.76%
Willem de Bruijn656.16%29.52%
Herbert Xu615.78%14.76%
Eric W. Biedermann615.78%14.76%
David S. Miller585.49%314.29%
Daniel Borkmann535.02%14.76%
Peter P. Waskiewicz Jr514.83%14.76%
Scott McMillan494.64%14.76%
Philip Pettersson454.26%14.76%
Andrey Konovalov100.95%14.76%
Al Viro10.09%14.76%
Total1056100.00%21100.00%


static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; int val, lv = sizeof(val); struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; if (level != SOL_PACKET) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case PACKET_STATISTICS: spin_lock_bh(&sk->sk_receive_queue.lock); memcpy(&st, &po->stats, sizeof(st)); memset(&po->stats, 0, sizeof(po->stats)); spin_unlock_bh(&sk->sk_receive_queue.lock); if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); st.stats3.tp_packets += st.stats3.tp_drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); st.stats1.tp_packets += st.stats1.tp_drops; data = &st.stats1; } break; case PACKET_AUXDATA: val = po->auxdata; break; case PACKET_ORIGDEV: val = po->origdev; break; case PACKET_VNET_HDR: val = po->has_vnet_hdr; break; case PACKET_VERSION: val = po->tp_version; break; case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); if (len < sizeof(int)) return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { case TPACKET_V1: val = sizeof(struct tpacket_hdr); break; case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; case TPACKET_V3: val = sizeof(struct tpacket3_hdr); break; default: return -EINVAL; } break; case PACKET_RESERVE: val = po->tp_reserve; break; case PACKET_LOSS: val = po->tp_loss; break; case PACKET_TIMESTAMP: val = po->tp_tstamp; break; case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | ((u32)po->fanout->type << 16) | ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) return -EINVAL; rstats.tp_all = atomic_long_read(&po->rollover->num); rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); data = &rstats; lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; break; case PACKET_QDISC_BYPASS: val = packet_use_direct_xmit(po); break; default: return -ENOPROTOOPT; } if (len > lv) len = lv; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, data, len)) return -EFAULT; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)10117.12%13.85%
Willem de Bruijn9415.93%311.54%
Patrick McHardy9115.42%27.69%
Daniel Borkmann528.81%27.69%
David S. Miller477.97%27.69%
Chetan Loke477.97%13.85%
Eric Dumazet355.93%27.69%
Herbert Xu325.42%13.85%
Alexander Potapenko132.20%13.85%
Mike Maloney132.20%13.85%
Peter P. Waskiewicz Jr101.69%13.85%
Paul Chavent101.69%13.85%
Linus Torvalds101.69%13.85%
Sridhar Samudrala101.69%13.85%
Johann Baudy101.69%13.85%
Scott McMillan101.69%13.85%
Arnaldo Carvalho de Melo20.34%27.69%
Al Viro20.34%13.85%
Adrian Bunk10.17%13.85%
Total590100.00%26100.00%

#ifdef CONFIG_COMPAT
static int compat_packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct packet_sock *po = pkt_sk(sock->sk); if (level != SOL_PACKET) return -ENOPROTOOPT; if (optname == PACKET_FANOUT_DATA && po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { optval = (char __user *)get_compat_bpf_fprog(optval); if (!optval) return -EFAULT; optlen = sizeof(struct sock_fprog); } return packet_setsockopt(sock, level, optname, optval, optlen); }

Contributors

PersonTokensPropCommitsCommitProp
Willem de Bruijn111100.00%1100.00%
Total111100.00%1100.00%

#endif
static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { struct sock *sk; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) packet_dev_mclist_delete(dev, &po->mclist); /* fallthrough */ case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (po->running) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); } break; case NETDEV_UP: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (po->num) register_prot_hook(sk); spin_unlock(&po->bind_lock); } break; } } rcu_read_unlock(); return NOTIFY_DONE; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)15761.57%15.00%
Stephen Hemminger176.67%15.00%
Ben Greear176.67%15.00%
Jason Lunz166.27%15.00%
David S. Miller135.10%420.00%
Arnaldo Carvalho de Melo103.92%420.00%
Denis V. Lunev72.75%210.00%
Daniel Borkmann51.96%15.00%
Jiri Pirko51.96%15.00%
Hideaki Yoshifuji / 吉藤英明31.18%15.00%
Francesco Ruggeri20.78%15.00%
James Morris20.78%15.00%
Eric W. Biedermann10.39%15.00%
Total255100.00%20100.00%


static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; switch (cmd) { case SIOCOUTQ: { int amount = sk_wmem_alloc_get(sk); return put_user(amount, (int __user *)arg); } case SIOCINQ: { struct sk_buff *skb; int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } case SIOCGSTAMP: return sock_get_timestamp(sk, (struct timeval __user *)arg); case SIOCGSTAMPNS: return sock_get_timestampns(sk, (struct timespec __user *)arg); #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: case SIOCDARP: case SIOCGARP: case SIOCSARP: case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: return inet_dgram_ops.ioctl(sock, cmd, arg); #endif default: return -ENOIOCTLCMD; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)19886.46%222.22%
Eric Dumazet187.86%222.22%
Andi Kleen52.18%111.11%
Arnaldo Carvalho de Melo31.31%111.11%
Al Viro20.87%111.11%
Christoph Hellwig20.87%111.11%
Linus Torvalds10.44%111.11%
Total229100.00%9100.00%


static unsigned int packet_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned int mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { if (!packet_previous_rx_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) po->pressure = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_write_queue.lock); if (po->tx_ring.pg_vec) { if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) mask |= POLLOUT | POLLWRNORM; } spin_unlock_bh(&sk->sk_write_queue.lock); return mask; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8748.88%19.09%
Johann Baudy5631.46%19.09%
Willem de Bruijn2111.80%218.18%
Ulisses Alonso Camaró42.25%19.09%
Patrick McHardy31.69%19.09%
David S. Miller31.69%19.09%
Arnaldo Carvalho de Melo21.12%218.18%
Chetan Loke10.56%19.09%
Adrian Bunk10.56%19.09%
Total178100.00%11100.00%

/* Dirty? Well, I still did not learn better way to account * for user mmaps. */
static void packet_mm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_inc(&pkt_sk(sk)->mapped); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4788.68%133.33%
Eric Dumazet35.66%133.33%
David S. Miller35.66%133.33%
Total53100.00%3100.00%


static void packet_mm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_dec(&pkt_sk(sk)->mapped); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4788.68%133.33%
Eric Dumazet35.66%133.33%
David S. Miller35.66%133.33%
Total53100.00%3100.00%

static const struct vm_operations_struct packet_mmap_ops = { .open = packet_mm_open, .close = packet_mm_close, };
static void free_pg_vec(struct pgv *pg_vec, unsigned int order, unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { if (is_vmalloc_addr(pg_vec[i].buffer)) vfree(pg_vec[i].buffer); else free_pages((unsigned long)pg_vec[i].buffer, order); pg_vec[i].buffer = NULL; } } kfree(pg_vec); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4846.15%120.00%
Neil Horman3634.62%120.00%
David S. Miller1514.42%120.00%
Changli Gao43.85%120.00%
Dave Hansen10.96%120.00%
Total104100.00%5100.00%


static char *alloc_one_pg_vec_page(unsigned long order) { char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc((1 << order) * PAGE_SIZE); if (buffer) return buffer; /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* complete and utter failure */ return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Neil Horman6161.62%120.00%
David S. Miller2222.22%120.00%
Eric Dumazet1313.13%240.00%
Daniel Borkmann33.03%120.00%
Total99100.00%5100.00%


static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec; int i; pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); if (unlikely(!pg_vec)) goto out; for (i = 0; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL; goto out; }

Contributors

PersonTokensPropCommitsCommitProp
David S. Miller9173.39%133.33%
Linus Torvalds (pre-git)2116.94%133.33%
Neil Horman129.68%133.33%
Total124100.00%3100.00%


static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; int err = -EINVAL; /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; lock_sock(sk); rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; err = -EBUSY; if (!closing) { if (atomic_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; } if (req->tp_block_nr) { /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V1: po->tp_hdrlen = TPACKET_HDRLEN; break; case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; case TPACKET_V3: po->tp_hdrlen = TPACKET3_HDRLEN; break; } err = -EINVAL; if (unlikely((int)req->tp_block_size <= 0)) goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && req->tp_block_size <= BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; err = -ENOMEM; order = get_order(req->tp_block_size); pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V3: /* Block transmit is not supported yet */ if (!tx_ring) { init_prb_bdqc(po, rb, pg_vec, req_u); } else { struct tpacket_req3 *req3 = &req_u->req3; if (req3->tp_retire_blk_tov || req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; goto out; } } break; default: break; } } /* Done */ else { err = -EINVAL; if (unlikely(req->tp_frame_nr)) goto out; } /* Detach socket from network */ spin_lock(&po->bind_lock); was_running = po->running; num = po->num; if (was_running) { po->num = 0; __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); synchronize_net(); err = -EBUSY; mutex_lock(&po->pg_vec_lock); if (closing || atomic_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); if (atomic_read(&po->mapped)) pr_err("packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running) { po->num = num; register_prot_hook(sk); } spin_unlock(&po->bind_lock); if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: release_sock(sk); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)33541.56%27.41%
Johann Baudy13216.38%13.70%
Chetan Loke8310.30%13.70%
David S. Miller455.58%311.11%
Sowmini Varadhan404.96%13.70%
Patrick McHardy354.34%27.41%
Hirofumi Ogawa273.35%13.70%
Eric Dumazet242.98%27.41%
Andrey Konovalov222.73%27.41%
Herbert Xu161.99%13.70%
Stephen Hemminger111.36%13.70%
Philip Pettersson101.24%13.70%
Ulisses Alonso Camaró101.24%13.70%
Tobias Klauser50.62%27.41%
Changli Gao30.37%13.70%
Al Viro30.37%13.70%
Neil Horman20.25%13.70%
Daniel Borkmann10.12%13.70%
Arnaldo Carvalho de Melo10.12%13.70%
Américo Wang10.12%13.70%
Total806100.00%27100.00%


static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned long size, expected_size; struct packet_ring_buffer *rb; unsigned long start; int err = -EINVAL; int i; if (vma->vm_pgoff) return -EINVAL; mutex_lock(&po->pg_vec_lock); expected_size = 0; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec) { expected_size += rb->pg_vec_len * rb->pg_vec_pages * PAGE_SIZE; } } if (expected_size == 0) goto out; size = vma->vm_end - vma->vm_start; if (size != expected_size) goto out; start = vma->vm_start; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec == NULL) continue; for (i = 0; i < rb->pg_vec_len; i++) { struct page *page; void *kaddr = rb->pg_vec[i].buffer; int pg_num; for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; kaddr += PAGE_SIZE; } } } atomic_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; out: mutex_unlock(&po->pg_vec_lock); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)14144.90%330.00%
Johann Baudy9630.57%110.00%
David S. Miller4915.61%220.00%
Neil Horman165.10%110.00%
Herbert Xu103.18%110.00%
Arnaldo Carvalho de Melo10.32%110.00%
Changli Gao10.32%110.00%
Total314100.00%10100.00%

static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind_spkt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, .poll = datagram_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_packet_setsockopt, #endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, .sendpage = sock_no_sendpage, }; static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, }; static struct notifier_block packet_netdev_notifier = { .notifier_call = packet_notifier, }; #ifdef CONFIG_PROC_FS
static void *packet_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct net *net = seq_file_net(seq); rcu_read_lock(); return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger2553.19%225.00%
Denis V. Lunev1225.53%225.00%
Li Zefan510.64%112.50%
Eric Dumazet36.38%112.50%
Arnaldo Carvalho de Melo12.13%112.50%
Linus Torvalds (pre-git)12.13%112.50%
Total47100.00%8100.00%


static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); return seq_hlist_next_rcu(v, &net->packet.sklist, pos); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger2657.78%233.33%
Denis V. Lunev1226.67%233.33%
Li Zefan48.89%116.67%
Herbert Xu36.67%116.67%
Total45100.00%6100.00%


static void packet_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger1986.36%266.67%
Eric Dumazet313.64%133.33%
Total22100.00%3100.00%


static int packet_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); else { struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), s->sk_type, ntohs(po->num), po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4740.17%216.67%
Stephen Hemminger3933.33%18.33%
David S. Miller1210.26%216.67%
Eric W. Biedermann86.84%18.33%
Arnaldo Carvalho de Melo65.13%325.00%
Li Zefan32.56%18.33%
Dan Rosenberg10.85%18.33%
Elena Reshetova10.85%18.33%
Total117100.00%12100.00%

static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, .show = packet_seq_show, };
static int packet_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &packet_seq_ops, sizeof(struct seq_net_private)); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger2060.61%125.00%
Denis V. Lunev1133.33%250.00%
Linus Torvalds (pre-git)26.06%125.00%
Total33100.00%4100.00%

static const struct file_operations packet_seq_fops = { .owner = THIS_MODULE, .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; #endif
static int __net_init packet_net_init(struct net *net) { mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops)) return -ENOMEM; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Denis V. Lunev4783.93%228.57%
Gao Feng58.93%114.29%
Linus Torvalds (pre-git)23.57%228.57%
Alexey Dobriyan11.79%114.29%
Pavel Emelyanov11.79%114.29%
Total56100.00%7100.00%


static void __net_exit packet_net_exit(struct net *net) { remove_proc_entry("packet", net->proc_net); WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); }

Contributors

PersonTokensPropCommitsCommitProp
Vasily Averin1440.00%114.29%
Denis V. Lunev822.86%114.29%
Linus Torvalds (pre-git)617.14%228.57%
Gao Feng514.29%114.29%
Alexey Dobriyan12.86%114.29%
Stephen Hemminger12.86%114.29%
Total35100.00%7100.00%

static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, };
static void __exit packet_exit(void) { unregister_netdevice_notifier(&packet_netdev_notifier); unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); }

Contributors

PersonTokensPropCommitsCommitProp
Denis V. Lunev1443.75%125.00%
Linus Torvalds (pre-git)1237.50%250.00%
Arnaldo Carvalho de Melo618.75%125.00%
Total32100.00%4100.00%


static int __init packet_init(void) { int rc = proto_register(&packet_proto, 0); if (rc != 0) goto out; sock_register(&packet_family_ops); register_pernet_subsys(&packet_net_ops); register_netdevice_notifier(&packet_netdev_notifier); out: return rc; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)2446.15%562.50%
Arnaldo Carvalho de Melo2344.23%112.50%
Denis V. Lunev47.69%112.50%
Eric W. Biedermann11.92%112.50%
Total52100.00%8100.00%

module_init(packet_init); module_exit(packet_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_PACKET);

Overall Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)445421.16%267.78%
Willem de Bruijn349816.62%288.38%
Chetan Loke291513.85%30.90%
David S. Miller16747.95%205.99%
Johann Baudy15177.21%10.30%
Daniel Borkmann12015.70%339.88%
Sridhar Samudrala4872.31%10.30%
Eric Dumazet4832.29%329.58%
Patrick McHardy4792.28%41.20%
Herbert Xu3321.58%61.80%
Stephen Hemminger2581.23%82.40%
Francesco Ruggeri2451.16%20.60%
Ben Greear2421.15%51.50%
Mike Maloney2090.99%20.60%
Eric W. Biedermann2000.95%102.99%
Neil Horman1850.88%41.20%
Denis V. Lunev1800.86%30.90%
Arnaldo Carvalho de Melo1590.76%102.99%
Sowmini Varadhan1390.66%10.30%
Atzm Watanabe1370.65%30.90%
Soheil Hassas Yeganeh1040.49%10.30%
Alexander Drozdov990.47%51.50%
Peter P. Waskiewicz Jr970.46%10.30%
Rusty Russell830.39%20.60%
Paul Chavent800.38%10.30%
Eyal Birger720.34%20.60%
Changli Gao720.34%41.20%
Jiri Pirko680.32%61.80%
Anoob Soman650.31%10.30%
Linus Torvalds650.31%61.80%
Scott McMillan620.29%10.30%
Hannes Frederic Sowa580.28%10.30%
Richard Cochran570.27%20.60%
Philip Pettersson550.26%10.30%
Pavel Emelyanov540.26%41.20%
Mitchell Blank Jr.450.21%10.30%
Hideaki Yoshifuji / 吉藤英明440.21%41.20%
Phil Sutter440.21%30.90%
Douglas Caetano dos Santos400.19%10.30%
Weongyo Jeong400.19%10.30%
Alexander Potapenko380.18%20.60%
Al Viro350.17%82.40%
Eric Leblond330.16%10.30%
Kees Cook320.15%10.30%
Andrey Konovalov320.15%30.90%
Steffen Hurrle300.14%10.30%
Jarek Poplawski290.14%10.30%
Wang Chen290.14%10.30%
Dan Collins280.13%10.30%
Hirofumi Ogawa270.13%10.30%
Mathias Krause240.11%10.30%
Ulisses Alonso Camaró240.11%10.30%
Iván Briano220.10%10.30%
Benjamin Poirier220.10%10.30%
Jason (Hui) Wang200.10%30.90%
Mathias Kretschmer180.09%10.30%
Michael S. Tsirkin180.09%20.60%
Elena Reshetova170.08%30.90%
Jarno Rajahalme160.08%20.60%
Jason Lunz160.08%10.30%
Parav Pandit150.07%10.30%
David Decotigny140.07%10.30%
Vasily Averin140.07%10.30%
Duan Jiong120.06%10.30%
Phil Oester120.06%10.30%
Li Zefan120.06%10.30%
Yoshihiro Shimoda110.05%10.30%
Octavian Purdila100.05%10.30%
Gao Feng100.05%20.60%
Lars Westerhoff90.04%10.30%
Oliver Hartkopp80.04%10.30%
Dave Jones70.03%10.30%
Adrian Bunk70.03%10.30%
Dave Hansen70.03%20.60%
Alexey Kuznetsov70.03%10.30%
Cyrill V. Gorcunov70.03%10.30%
Edward Hyunkoo Jee70.03%10.30%
James Morris60.03%10.30%
Vasiliy Kulikov60.03%10.30%
Dmitry Mishin50.02%10.30%
Benjamin Collins50.02%10.30%
Tobias Klauser50.02%20.60%
Andi Kleen50.02%10.30%
David Woodhouse50.02%10.30%
Urs Thuermann40.02%10.30%
Alexei Starovoitov40.02%10.30%
Tejun Heo30.01%10.30%
Alexey Dobriyan30.01%20.60%
Eric Paris30.01%10.30%
Tom Herbert30.01%20.60%
Randy Dunlap30.01%10.30%
Veaceslav Falico20.01%10.30%
Christoph Hellwig20.01%10.30%
Gabor Gombas20.01%10.30%
Philippe De Muyter10.00%10.30%
Américo Wang10.00%10.30%
Fengguang Wu10.00%10.30%
Li Zhong10.00%10.30%
Dan Rosenberg10.00%10.30%
Christoph Jaeger10.00%10.30%
Ying Xue10.00%10.30%
Ralf Bächle10.00%10.30%
Arjan van de Ven10.00%10.30%
Total21052100.00%334100.00%
Directory: net/packet
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.