cregit-Linux how code gets into the kernel

Release 4.8 net/packet/af_packet.c

Directory: net/packet
/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              PACKET - implements raw packet sockets.
 *
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 *
 * Fixes:
 *              Alan Cox        :       verify_area() now used correctly
 *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 *              Alan Cox        :       tidied skbuff lists.
 *              Alan Cox        :       Now uses generic datagram routines I
 *                                      added. Also fixed the peek/read crash
 *                                      from all old Linux datagram code.
 *              Alan Cox        :       Uses the improved datagram code.
 *              Alan Cox        :       Added NULL's for socket options.
 *              Alan Cox        :       Re-commented the code.
 *              Alan Cox        :       Use new kernel side addressing
 *              Rob Janssen     :       Correct MTU usage.
 *              Dave Platt      :       Counter leaks caused by incorrect
 *                                      interrupt locking and some slightly
 *                                      dubious gcc output. Can you read
 *                                      compiler: it said _VOLATILE_
 *      Richard Kooijman        :       Timestamp fixes.
 *              Alan Cox        :       New buffers. Use sk->mac.raw.
 *              Alan Cox        :       sendmsg/recvmsg support.
 *              Alan Cox        :       Protocol setting support
 *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 *      Cyrus Durgin            :       Fixed kerneld for kmod.
 *      Michal Ostrowski        :       Module initialization cleanup.
 *         Ulises Alonso        :       Frame number limit removal and
 *                                      packet_set_ring memory leak.
 *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 *                                      The convention is that longer addresses
 *                                      will simply extend the hardware address
 *                                      byte arrays at the end of sockaddr_ll
 *                                      and packet_mreq.
 *              Johann Baudy    :       Added TX RING.
 *              Chetan Loke     :       Implemented TPACKET_V3 block abstraction
 *                                      layer.
 *                                      Copyright (C) 2011, <lokec@ccs.neu.edu>
 *
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 */

#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/if_vlan.h>
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/percpu.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <linux/bpf.h>
#include <net/compat.h>

#include "internal.h"

/*
   Assumptions:
   - if device has no dev->hard_header routine, it adds and removes ll header
     inside itself. In this case ll header is invisible outside of device,
     but higher levels still should reserve dev->hard_header_len.
     Some devices are enough clever to reallocate skb, when header
     will not fit to reserved space (tunnel), another ones are silly
     (PPP).
   - packet socket receives packets with pulled ll header,
     so that SOCK_RAW should push it back.

On receive:
-----------

Incoming, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> data

Outgoing, dev->hard_header!=NULL
   mac_header -> ll header
   data       -> ll header

Incoming, dev->hard_header==NULL
   mac_header -> UNKNOWN position. It is very likely, that it points to ll
                 header.  PPP makes it, that is wrong, because introduce
                 assymetry between rx and tx paths.
   data       -> data

Outgoing, dev->hard_header==NULL
   mac_header -> data. ll header is still not built!
   data       -> data

Resume
  If dev->hard_header==NULL we are unlikely to restore sensible ll header.


On transmit:
------------

dev->hard_header != NULL
   mac_header -> ll header
   data       -> ll header

dev->hard_header == NULL (ll header is added by device, we cannot control it)
   mac_header -> data
   data       -> data

   We should set nh.raw on output to correct posistion,
   packet classifier depends on it.
 */

/* Private packet socket structures. */

/* identical to struct packet_mreq except it has
 * a longer address field.
 */

struct packet_mreq_max {
	
int		mr_ifindex;
	
unsigned short	mr_type;
	
unsigned short	mr_alen;
	
unsigned char	mr_address[MAX_ADDR_LEN];
};


union tpacket_uhdr {
	
struct tpacket_hdr  *h1;
	
struct tpacket2_hdr *h2;
	
struct tpacket3_hdr *h3;
	
void *raw;
};

static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
		int closing, int tx_ring);


#define V3_ALIGNMENT	(8)


#define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))


#define BLK_PLUS_PRIV(sz_of_priv) \
	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))


#define PGV_FROM_VMALLOC 1


#define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)

#define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)

#define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)

#define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)

#define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)

#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)

#define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))

struct packet_sock;
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
		       struct packet_type *pt, struct net_device *orig_dev);

static void *packet_previous_frame(struct packet_sock *po,
		struct packet_ring_buffer *rb,
		int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
			struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
			struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
		struct packet_sock *, unsigned int status);
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
		struct tpacket_block_desc *);
static void prb_retire_rx_blk_timer_expired(unsigned long);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
static void prb_init_blk_timer(struct packet_sock *,
		struct tpacket_kbdq_core *,
		void (*func) (unsigned long));
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
		struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);


struct packet_skb_cb {
	union {
		
struct sockaddr_pkt pkt;
		union {
			/* Trick: alias skb original length with
                         * ll.sll_family and ll.protocol in order
                         * to save room.
                         */
			
unsigned int origlen;
			
struct sockaddr_ll ll;
		};
	} 
sa;
};


#define vio_le() virtio_legacy_is_little_endian()


#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))


#define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))

#define GET_PBLOCK_DESC(x, bid)	\
	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))

#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))

#define GET_NEXT_PRB_BLK_NUM(x) \
	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
        ((x)->kactive_blk_num+1) : 0)

static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);


static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; features = netif_skb_features(skb); if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto drop; txq = skb_get_tx_queue(dev, skb); local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); if (!dev_xmit_complete(ret)) kfree_skb(skb); return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return NET_XMIT_DROP; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann16097.56%562.50%
david s. millerdavid s. miller42.44%337.50%
Total164100.00%8100.00%


static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(po->cached_dev); if (likely(dev)) dev_hold(dev); rcu_read_unlock(); return dev; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann48100.00%1100.00%
Total48100.00%1100.00%


static void packet_cached_dev_assign(struct packet_sock *po, struct net_device *dev) { rcu_assign_pointer(po->cached_dev, dev); }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann25100.00%1100.00%
Total25100.00%1100.00%


static void packet_cached_dev_reset(struct packet_sock *po) { RCU_INIT_POINTER(po->cached_dev, NULL); }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann20100.00%1100.00%
Total20100.00%1100.00%


static bool packet_use_direct_xmit(const struct packet_sock *po) { return po->xmit == packet_direct_xmit; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann19100.00%1100.00%
Total19100.00%1100.00%


static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann2696.30%266.67%
li zhongli zhong13.70%133.33%
Total27100.00%3100.00%


static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index; if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb, NULL, __packet_pick_tx_queue); queue_index = netdev_cap_txqueue(dev, queue_index); } else { queue_index = __packet_pick_tx_queue(dev, skb); } skb_set_queue_mapping(skb, queue_index); }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann80100.00%1100.00%
Total80100.00%1100.00%

/* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */
static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); if (!po->running) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; } }

Contributors

PersonTokensPropCommitsCommitProp
david s. millerdavid s. miller63100.00%2100.00%
Total63100.00%2100.00%

/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock * held. If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the * callers responsibility to take care of this. */
static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); po->running = 0; if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); __sock_put(sk); if (sync) { spin_unlock(&po->bind_lock); synchronize_net(); spin_lock(&po->bind_lock); } }

Contributors

PersonTokensPropCommitsCommitProp
david s. millerdavid s. miller82100.00%2100.00%
Total82100.00%2100.00%


static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); if (po->running) __unregister_prot_hook(sk, sync); }

Contributors

PersonTokensPropCommitsCommitProp
david s. millerdavid s. miller37100.00%1100.00%
Total37100.00%1100.00%


static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); }

Contributors

PersonTokensPropCommitsCommitProp
changli gaochangli gao2472.73%150.00%
michael s. tsirkinmichael s. tsirkin927.27%150.00%
Total33100.00%2100.00%


static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); } smp_wmb(); }

Contributors

PersonTokensPropCommitsCommitProp
johann baudyjohann baudy4341.75%111.11%
patrick mchardypatrick mchardy3836.89%111.11%
chetan lokechetan loke76.80%111.11%
ulisses alonso camaroulisses alonso camaro65.83%111.11%
daniel borkmanndaniel borkmann32.91%111.11%
pre-gitpre-git21.94%111.11%
changli gaochangli gao21.94%111.11%
michael s. tsirkinmichael s. tsirkin10.97%111.11%
arnaldo carvalho de meloarnaldo carvalho de melo10.97%111.11%
Total103100.00%9100.00%


static int __packet_get_status(struct packet_sock *po, void *frame) { union tpacket_uhdr h; smp_rmb(); h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } }

Contributors

PersonTokensPropCommitsCommitProp
patrick mchardypatrick mchardy4040.40%120.00%
johann baudyjohann baudy3535.35%120.00%
chetan lokechetan loke1919.19%120.00%
daniel borkmanndaniel borkmann33.03%120.00%
changli gaochangli gao22.02%120.00%
Total99100.00%5100.00%


static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps && (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; if (ktime_to_timespec_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann69100.00%2100.00%
Total69100.00%2100.00%


static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct sk_buff *skb) { union tpacket_uhdr h; struct timespec ts; __u32 ts_status; if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) return 0; h.raw = frame; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; break; case TPACKET_V2: h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); } /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); return ts_status; }

Contributors

PersonTokensPropCommitsCommitProp
willem de bruijnwillem de bruijn12985.43%133.33%
daniel borkmanndaniel borkmann2214.57%266.67%
Total151100.00%3100.00%


static void *packet_lookup_frame(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; h.raw = rb->pg_vec[pg_vec_pos].buffer + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; return h.raw; }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke8996.74%150.00%
daniel borkmanndaniel borkmann33.26%150.00%
Total92100.00%2100.00%


static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke34100.00%1100.00%
Total34100.00%1100.00%


static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke19100.00%2100.00%
Total19100.00%2100.00%


static void prb_shutdown_retire_blk_timer(struct packet_sock *po, struct sk_buff_head *rb_queue) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke5391.38%250.00%
duan jiongduan jiong35.17%125.00%
veaceslav falicoveaceslav falico23.45%125.00%
Total58100.00%4100.00%


static void prb_init_blk_timer(struct packet_sock *po, struct tpacket_kbdq_core *pkc, void (*func) (unsigned long)) { init_timer(&pkc->retire_blk_timer); pkc->retire_blk_timer.data = (long)po; pkc->retire_blk_timer.function = func; pkc->retire_blk_timer.expires = jiffies; }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke61100.00%2100.00%
Total61100.00%2100.00%


static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke3291.43%266.67%
duan jiongduan jiong38.57%133.33%
Total35100.00%3100.00%


static int prb_calc_retire_blk_tmo(struct packet_sock *po, int blk_size_in_bytes) { struct net_device *dev; unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; struct ethtool_link_ksettings ecmd; int err; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); if (unlikely(!dev)) { rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_link_ksettings(dev, &ecmd); rtnl_unlock(); if (!err) { /* * If the link speed is so slow you don't really * need to worry about perf anyways */ if (ecmd.base.speed < SPEED_1000 || ecmd.base.speed == SPEED_UNKNOWN) { return DEFAULT_PRB_RETIRE_TOV; } else { msec = 1; div = ecmd.base.speed / 1000; } } mbits = (blk_size_in_bytes * 8) / (1024 * 1024); if (div) mbits /= div; tmo = mbits * msec; if (div) return tmo+1; return tmo; }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke12770.17%125.00%
jiri pirkojiri pirko2513.81%125.00%
parav panditparav pandit158.29%125.00%
david decotignydavid decotigny147.73%125.00%
Total181100.00%4100.00%


static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, union tpacket_req_u *req_u) { p1->feature_req_word = req_u->req3.tp_feature_req_word; }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke26100.00%2100.00%
Total26100.00%2100.00%


static void init_prb_bdqc(struct packet_sock *po, struct packet_ring_buffer *rb, struct pgv *pg_vec, union tpacket_req_u *req_u) { struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po); prb_open_block(p1, pbd); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke21391.81%240.00%
eric dumazeteric dumazet156.47%120.00%
duan jiongduan jiong31.29%120.00%
daniel borkmanndaniel borkmann10.43%120.00%
Total232100.00%5100.00%

/* Do NOT update the last_blk_num first. * Assumes sk_buff_head lock is held. */
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) { mod_timer(&pkc->retire_blk_timer, jiffies + pkc->tov_in_jiffies); pkc->last_kactive_blk_num = pkc->kactive_blk_num; }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke33100.00%2100.00%
Total33100.00%2100.00%

/* * Timer logic: * 1) We refresh the timer only when we open a block. * By doing this we don't waste cycles refreshing the timer * on packet-by-packet basis. * * With a 1MB block-size, on a 1Gbps line, it will take * i) ~8 ms to fill a block + ii) memcpy etc. * In this cut we are not accounting for the memcpy time. * * So, if the user sets the 'tmo' to 10ms then the timer * will never fire while the block is still getting filled * (which is what we want). However, the user could choose * to close a block early and that's fine. * * But when the timer does fire, we check whether or not to refresh it. * Since the tmo granularity is in msecs, it is not too expensive * to refresh the timer, lets say every '8' msecs. * Either the user can set the 'tmo' or we can derive it based on * a) line-speed and b) block-size. * prb_calc_retire_blk_tmo() calculates the tmo. * */
static void prb_retire_rx_blk_timer_expired(unsigned long data) { struct packet_sock *po = (struct packet_sock *)data; struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; spin_lock(&po->sk.sk_receive_queue.lock); frozen = prb_queue_frozen(pkc); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); if (unlikely(pkc->delete_blk_timer)) goto out; /* We only need to plug the race when the block is partially filled. * tpacket_rcv: * lock(); increment BLOCK_NUM_PKTS; unlock() * copy_bits() is in progress ... * timer fires on other cpu: * we can't retire the current block because copy_bits * is in progress. * */ if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ cpu_relax(); } } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { if (!BLOCK_NUM_PKTS(pbd)) { /* An empty block. Just refresh the timer. */ goto refresh_timer; } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; else goto out; } else { /* Case 1. Queue was frozen because user-space was * lagging behind. */ if (prb_curr_blk_in_use(pkc, pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. */ goto refresh_timer; } else { /* Case 2. queue was frozen,user-space caught up, * now the link went idle && the timer fired. * We don't have a block to close.So we open this * block and restart the timer. * opening a block thaws the queue,restarts timer * Thawing/timer-refresh is a side effect. */ prb_open_block(pkc, pbd); goto out; } } } refresh_timer: _prb_refresh_rx_retire_blk_timer(pkc); out: spin_unlock(&po->sk.sk_receive_queue.lock); }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke20492.31%250.00%
alexander drozdovalexander drozdov146.33%125.00%
duan jiongduan jiong31.36%125.00%
Total221100.00%4100.00%


static void prb_flush_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, __u32 status) { /* Flush everything minus the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 u8 *start, *end; start = (u8 *)pbd1; /* Skip the block header(we know header WILL fit in 4K) */ start += PAGE_SIZE; end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); for (; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif /* Now update the block status. */ BLOCK_STATUS(pbd1) = status; /* Flush the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 start = (u8 *)pbd1; flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif }

Contributors

PersonTokensPropCommitsCommitProp
chetan lokechetan loke121100.00%2100.00%
Total121100.00%2100.00%

/* * Side effect: * * 1) flush the block * 2) Increment active_blk_num * * Note:We DONT refresh the timer on purpose. * Because almost always the next block will be opened. */
static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, struct packet_sock *po, unsigned int stat) { __u32 status = TP_STATUS_USER | stat; struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; last_pkt->tp_next_offset = 0; /* Get the ts of the last pkt */ if (BLOCK_NUM_PKTS(pbd1)) { <