cregit-Linux how code gets into the kernel

Release 4.14 net/packet/af_packet.c

Directory: net/packet
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *              PACKET - implements raw packet sockets.
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Alan Cox, <>
 * Fixes:
 *              Alan Cox        :       verify_area() now used correctly
 *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 *              Alan Cox        :       tidied skbuff lists.
 *              Alan Cox        :       Now uses generic datagram routines I
 *                                      added. Also fixed the peek/read crash
 *                                      from all old Linux datagram code.
 *              Alan Cox        :       Uses the improved datagram code.
 *              Alan Cox        :       Added NULL's for socket options.
 *              Alan Cox        :       Re-commented the code.
 *              Alan Cox        :       Use new kernel side addressing
 *              Rob Janssen     :       Correct MTU usage.
 *              Dave Platt      :       Counter leaks caused by incorrect
 *                                      interrupt locking and some slightly
 *                                      dubious gcc output. Can you read
 *                                      compiler: it said _VOLATILE_
 *      Richard Kooijman        :       Timestamp fixes.
 *              Alan Cox        :       New buffers. Use sk->mac.raw.
 *              Alan Cox        :       sendmsg/recvmsg support.
 *              Alan Cox        :       Protocol setting support
 *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 *      Cyrus Durgin            :       Fixed kerneld for kmod.
 *      Michal Ostrowski        :       Module initialization cleanup.
 *         Ulises Alonso        :       Frame number limit removal and
 *                                      packet_set_ring memory leak.
 *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 *                                      The convention is that longer addresses
 *                                      will simply extend the hardware address
 *                                      byte arrays at the end of sockaddr_ll
 *                                      and packet_mreq.
 *              Johann Baudy    :       Added TX RING.
 *              Chetan Loke     :       Implemented TPACKET_V3 block abstraction
 *                                      layer.
 *                                      Copyright (C) 2011, <>
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.

#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/if_vlan.h>
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/percpu.h>
#include <net/inet_common.h>
#include <linux/bpf.h>
#include <net/compat.h>

#include "internal.h"

   - if device has no dev->hard_header routine, it adds and removes ll header
     inside itself. In this case ll header is invisible outside of device,
     but higher levels still should reserve dev->hard_header_len.
     Some devices are enough clever to reallocate skb, when header
     will not fit to reserved space (tunnel), another ones are silly
   - packet socket receives packets with pulled ll header,
     so that SOCK_RAW should push it back.

On receive:

Incoming, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> data

Outgoing, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> ll header

Incoming, dev->hard_header==NULL
   mac_header -> UNKNOWN position. It is very likely, that it points to ll
                 header.  PPP makes it, that is wrong, because introduce
                 assymetry between rx and tx paths.
   data       -> data

Outgoing, dev->hard_header==NULL
   mac_header -> data. ll header is still not built!
   data       -> data

  If dev->hard_header==NULL we are unlikely to restore sensible ll header.

On transmit:

dev->hard_header != NULL
   mac_header -> ll header
   data       -> ll header

dev->hard_header == NULL (ll header is added by device, we cannot control it)
   mac_header -> data
   data       -> data

   We should set nh.raw on output to correct posistion,
   packet classifier depends on it.

/* Private packet socket structures. */

/* identical to struct packet_mreq except it has
 * a longer address field.

struct packet_mreq_max {
int		mr_ifindex;
unsigned short	mr_type;
unsigned short	mr_alen;
unsigned char	mr_address[MAX_ADDR_LEN];

union tpacket_uhdr {
struct tpacket_hdr  *h1;
struct tpacket2_hdr *h2;
struct tpacket3_hdr *h3;
void *raw;

static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
		int closing, int tx_ring);

#define V3_ALIGNMENT	(8)

#define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))

#define BLK_PLUS_PRIV(sz_of_priv) \
	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))

#define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)

#define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)

#define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)

#define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)

#define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)

#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)

#define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))

struct packet_sock;
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
		       struct packet_type *pt, struct net_device *orig_dev);

static void *packet_previous_frame(struct packet_sock *po,
		struct packet_ring_buffer *rb,
		int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
static int prb_curr_blk_in_use(struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
			struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
		struct packet_sock *, unsigned int status);
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
		struct tpacket_block_desc *);
static void prb_retire_rx_blk_timer_expired(unsigned long);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
static void prb_init_blk_timer(struct packet_sock *,
		struct tpacket_kbdq_core *,
		void (*func) (unsigned long));
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb);

struct packet_skb_cb {
	union {
struct sockaddr_pkt pkt;
		union {
			/* Trick: alias skb original length with
                         * ll.sll_family and ll.protocol in order
                         * to save room.
unsigned int origlen;
struct sockaddr_ll ll;
} sa;

#define vio_le() virtio_legacy_is_little_endian()

#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))

#define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))

#define GET_PBLOCK_DESC(x, bid)	\
	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))

	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))

#define GET_NEXT_PRB_BLK_NUM(x) \
	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
        ((x)->kactive_blk_num+1) : 0)

static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);

static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; skb = validate_xmit_skb_list(skb, dev); if (skb != orig_skb) goto drop; packet_pick_tx_queue(dev, skb); txq = skb_get_tx_queue(dev, skb); local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); if (!dev_xmit_complete(ret)) kfree_skb(skb); return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; }


Daniel Borkmann14585.80%550.00%
Willem de Bruijn137.69%110.00%
Iván Briano74.14%110.00%
David S. Miller42.37%330.00%

static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(po->cached_dev); if (likely(dev)) dev_hold(dev); rcu_read_unlock(); return dev; }


Daniel Borkmann48100.00%1100.00%

static void packet_cached_dev_assign(struct packet_sock *po, struct net_device *dev) { rcu_assign_pointer(po->cached_dev, dev); }


Daniel Borkmann25100.00%1100.00%

static void packet_cached_dev_reset(struct packet_sock *po) { RCU_INIT_POINTER(po->cached_dev, NULL); }


Daniel Borkmann20100.00%1100.00%

static bool packet_use_direct_xmit(const struct packet_sock *po) { return po->xmit == packet_direct_xmit; }


Daniel Borkmann19100.00%1100.00%

static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; }


Daniel Borkmann2696.30%266.67%
Li Zhong13.70%133.33%

static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index; if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb, NULL, __packet_pick_tx_queue); queue_index = netdev_cap_txqueue(dev, queue_index); } else { queue_index = __packet_pick_tx_queue(dev, skb); } skb_set_queue_mapping(skb, queue_index); }


Daniel Borkmann80100.00%1100.00%

/* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */
static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); if (!po->running) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; } }


David S. Miller63100.00%2100.00%

/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock * held. If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the * callers responsibility to take care of this. */
static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); po->running = 0; if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); __sock_put(sk); if (sync) { spin_unlock(&po->bind_lock); synchronize_net(); spin_lock(&po->bind_lock); } }


David S. Miller82100.00%2100.00%

static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); if (po->running) __unregister_prot_hook(sk, sync); }


David S. Miller37100.00%1100.00%

static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); }


Changli Gao2472.73%150.00%
Michael S. Tsirkin927.27%150.00%

static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: h.h3->tp_status = status; flush_dcache_page(pgv_to_page(&h.h3->tp_status)); break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } smp_wmb(); }


Johann Baudy4334.40%110.00%
Patrick McHardy3931.20%110.00%
Sowmini Varadhan2217.60%110.00%
Chetan Loke75.60%110.00%
Ulisses Alonso Camaró54.00%110.00%
Daniel Borkmann32.40%110.00%
Linus Torvalds (pre-git)21.60%110.00%
Changli Gao21.60%110.00%
Arnaldo Carvalho de Melo10.80%110.00%
Michael S. Tsirkin10.80%110.00%

static int __packet_get_status(struct packet_sock *po, void *frame) { union tpacket_uhdr h; smp_rmb(); h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; case TPACKET_V3: flush_dcache_page(pgv_to_page(&h.h3->tp_status)); return h.h3->tp_status; default: WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } }


Patrick McHardy4235.29%116.67%
Johann Baudy3529.41%116.67%
Sowmini Varadhan2016.81%116.67%
Chetan Loke1714.29%116.67%
Daniel Borkmann32.52%116.67%
Changli Gao21.68%116.67%

static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps && (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; if (ktime_to_timespec_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; return 0; }


Daniel Borkmann69100.00%2100.00%

static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct sk_buff *skb) { union tpacket_uhdr h; struct timespec ts; __u32 ts_status; if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) return 0; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; break; case TPACKET_V2: h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); return ts_status; }


Willem de Bruijn12975.00%125.00%
Daniel Borkmann4325.00%375.00%

static void *packet_lookup_frame(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; h.raw = rb->pg_vec[pg_vec_pos].buffer + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; return h.raw; }


Chetan Loke8996.74%150.00%
Daniel Borkmann33.26%150.00%

static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); }


Chetan Loke34100.00%1100.00%

static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); }


Chetan Loke19100.00%2100.00%

static void prb_shutdown_retire_blk_timer(struct packet_sock *po, struct sk_buff_head *rb_queue) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); }


Chetan Loke5391.38%250.00%
Duan Jiong35.17%125.00%
Veaceslav Falico23.45%125.00%

static void prb_init_blk_timer(struct packet_sock *po, struct tpacket_kbdq_core *pkc, void (*func) (unsigned long)) { init_timer(&pkc->retire_blk_timer); pkc-> = (long)po; pkc->retire_blk_timer.function = func; pkc->retire_blk_timer.expires = jiffies; }


Chetan Loke61100.00%2100.00%

static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); }


Chetan Loke3291.43%266.67%
Duan Jiong38.57%133.33%

static int prb_calc_retire_blk_tmo(struct packet_sock *po, int blk_size_in_bytes) { struct net_device *dev; unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; struct ethtool_link_ksettings ecmd; int err; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); if (unlikely(!dev)) { rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_link_ksettings(dev, &ecmd); rtnl_unlock(); if (!err) { /* * If the link speed is so slow you don't really * need to worry about perf anyways */ if (ecmd.base.speed < SPEED_1000 || ecmd.base.speed == SPEED_UNKNOWN) { return DEFAULT_PRB_RETIRE_TOV; } else { msec = 1; div = ecmd.base.speed / 1000; } } mbits = (blk_size_in_bytes * 8) / (1024 * 1024); if (div) mbits /= div; tmo = mbits * msec; if (div) return tmo+1; return tmo; }


Chetan Loke12770.17%125.00%
Jiri Pirko2513.81%125.00%
Parav Pandit158.29%125.00%
David Decotigny147.73%125.00%

static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, union tpacket_req_u *req_u) { p1->feature_req_word = req_u->req3.tp_feature_req_word; }


Chetan Loke26100.00%2100.00%

static void init_prb_bdqc(struct packet_sock *po, struct packet_ring_buffer *rb, struct pgv *pg_vec, union tpacket_req_u *req_u) { struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po); prb_open_block(p1, pbd); }


Chetan Loke21391.81%240.00%
Eric Dumazet156.47%120.00%
Duan Jiong31.29%120.00%
Daniel Borkmann10.43%120.00%

/* Do NOT update the last_blk_num first. * Assumes sk_buff_head lock is held. */
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) { mod_timer(&pkc->retire_blk_timer, jiffies + pkc->tov_in_jiffies); pkc->last_kactive_blk_num = pkc->kactive_blk_num; }


Chetan Loke33100.00%2100.00%

/* * Timer logic: * 1) We refresh the timer only when we open a block. * By doing this we don't waste cycles refreshing the timer * on packet-by-packet basis. * * With a 1MB block-size, on a 1Gbps line, it will take * i) ~8 ms to fill a block + ii) memcpy etc. * In this cut we are not accounting for the memcpy time. * * So, if the user sets the 'tmo' to 10ms then the timer * will never fire while the block is still getting filled * (which is what we want). However, the user could choose * to close a block early and that's fine. * * But when the timer does fire, we check whether or not to refresh it. * Since the tmo granularity is in msecs, it is not too expensive * to refresh the timer, lets say every '8' msecs. * Either the user can set the 'tmo' or we can derive it based on * a) line-speed and b) block-size. * prb_calc_retire_blk_tmo() calculates the tmo. * */
static void prb_retire_rx_blk_timer_expired(unsigned long data) { struct packet_sock *po = (struct packet_sock *)data; struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; spin_lock(&po->sk.sk_receive_queue.lock); frozen = prb_queue_frozen(pkc); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); if (unlikely(pkc->delete_blk_timer)) goto out; /* We only need to plug the race when the block is partially filled. * tpacket_rcv: * lock(); increment BLOCK_NUM_PKTS; unlock() * copy_bits() is in progress ... * timer fires on other cpu: * we can't retire the current block because copy_bits * is in progress. * */ if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ cpu_relax(); } } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { if (!BLOCK_NUM_PKTS(pbd)) { /* An empty block. Just refresh the timer. */ goto refresh_timer; } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; else goto out; } else { /* Case 1. Queue was frozen because user-space was * lagging behind. */ if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. */ goto refresh_timer; } else { /* Case 2. queue was frozen,user-space caught up, * now the link went idle && the timer fired. * We don't have a block to close.So we open this * block and restart the timer. * opening a block thaws the queue,restarts timer * Thawing/timer-refresh is a side effect. */ prb_open_block(pkc, pbd); goto out; } } } refresh_timer: _prb_refresh_rx_retire_blk_timer(pkc); out: spin_unlock(&po->sk.sk_receive_queue.lock); }


Chetan Loke20292.24%250.00%
Alexander Drozdov146.39%125.00%
Duan Jiong31.37%125.00%

static void prb_flush_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, __u32 status) { /* Flush everything minus the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 u8 *start, *end; start = (u8 *)pbd1; /* Skip the block header(we know header WILL fit in 4K) */ start += PAGE_SIZE; end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); for (; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif /* Now update the block status. */ BLOCK_STATUS(pbd1) = status; /* Flush the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 start = (u8 *)pbd1; flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif }


Chetan Loke121100.00%2100.00%

/* * Side effect: * * 1) flush the block * 2) Increment active_blk_num * * Note:We DONT refresh the timer on purpose. * Because almost always the next block will be opened. */
static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, struct packet_sock *po, unsigned int stat) { __u32 status = TP_STATUS_USER | stat; struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; last_pkt->tp_next_offset = 0; /* Get the ts of the last pkt */ if (BLOCK_NUM_PKTS(pbd1)) { h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { /* Ok, we tmo'd - so get the current time. * * It shouldn't really happen as we don't close empty * blocks. See prb_retire_rx_blk_timer_expired(). */ struct timespec