Release 4.11 net/packet/af_packet.c
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* PACKET - implements raw packet sockets.
*
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
*
* Fixes:
* Alan Cox : verify_area() now used correctly
* Alan Cox : new skbuff lists, look ma no backlogs!
* Alan Cox : tidied skbuff lists.
* Alan Cox : Now uses generic datagram routines I
* added. Also fixed the peek/read crash
* from all old Linux datagram code.
* Alan Cox : Uses the improved datagram code.
* Alan Cox : Added NULL's for socket options.
* Alan Cox : Re-commented the code.
* Alan Cox : Use new kernel side addressing
* Rob Janssen : Correct MTU usage.
* Dave Platt : Counter leaks caused by incorrect
* interrupt locking and some slightly
* dubious gcc output. Can you read
* compiler: it said _VOLATILE_
* Richard Kooijman : Timestamp fixes.
* Alan Cox : New buffers. Use sk->mac.raw.
* Alan Cox : sendmsg/recvmsg support.
* Alan Cox : Protocol setting support
* Alexey Kuznetsov : Untied from IPv4 stack.
* Cyrus Durgin : Fixed kerneld for kmod.
* Michal Ostrowski : Module initialization cleanup.
* Ulises Alonso : Frame number limit removal and
* packet_set_ring memory leak.
* Eric Biederman : Allow for > 8 byte hardware addresses.
* The convention is that longer addresses
* will simply extend the hardware address
* byte arrays at the end of sockaddr_ll
* and packet_mreq.
* Johann Baudy : Added TX RING.
* Chetan Loke : Implemented TPACKET_V3 block abstraction
* layer.
* Copyright (C) 2011, <lokec@ccs.neu.edu>
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/if_vlan.h>
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/percpu.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <linux/bpf.h>
#include <net/compat.h>
#include "internal.h"
/*
Assumptions:
- if device has no dev->hard_header routine, it adds and removes ll header
inside itself. In this case ll header is invisible outside of device,
but higher levels still should reserve dev->hard_header_len.
Some devices are enough clever to reallocate skb, when header
will not fit to reserved space (tunnel), another ones are silly
(PPP).
- packet socket receives packets with pulled ll header,
so that SOCK_RAW should push it back.
On receive:
-----------
Incoming, dev->hard_header!=NULL
mac_header -> ll header
data -> data
Outgoing, dev->hard_header!=NULL
mac_header -> ll header
data -> ll header
Incoming, dev->hard_header==NULL
mac_header -> UNKNOWN position. It is very likely, that it points to ll
header. PPP makes it, that is wrong, because introduce
assymetry between rx and tx paths.
data -> data
Outgoing, dev->hard_header==NULL
mac_header -> data. ll header is still not built!
data -> data
Resume
If dev->hard_header==NULL we are unlikely to restore sensible ll header.
On transmit:
------------
dev->hard_header != NULL
mac_header -> ll header
data -> ll header
dev->hard_header == NULL (ll header is added by device, we cannot control it)
mac_header -> data
data -> data
We should set nh.raw on output to correct posistion,
packet classifier depends on it.
*/
/* Private packet socket structures. */
/* identical to struct packet_mreq except it has
* a longer address field.
*/
struct packet_mreq_max {
int mr_ifindex;
unsigned short mr_type;
unsigned short mr_alen;
unsigned char mr_address[MAX_ADDR_LEN];
};
union tpacket_uhdr {
struct tpacket_hdr *h1;
struct tpacket2_hdr *h2;
struct tpacket3_hdr *h3;
void *raw;
};
static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
int closing, int tx_ring);
#define V3_ALIGNMENT (8)
#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
#define BLK_PLUS_PRIV(sz_of_priv) \
(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
#define PGV_FROM_VMALLOC 1
#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
struct packet_sock;
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev);
static void *packet_previous_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
struct packet_sock *, unsigned int status);
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
struct tpacket_block_desc *);
static void prb_retire_rx_blk_timer_expired(unsigned long);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
static void prb_init_blk_timer(struct packet_sock *,
struct tpacket_kbdq_core *,
void (*func) (unsigned long));
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
struct packet_skb_cb {
union {
struct sockaddr_pkt pkt;
union {
/* Trick: alias skb original length with
* ll.sll_family and ll.protocol in order
* to save room.
*/
unsigned int origlen;
struct sockaddr_ll ll;
};
}
sa;
};
#define vio_le() virtio_legacy_is_little_endian()
#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
#define GET_PBLOCK_DESC(x, bid) \
((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
#define GET_NEXT_PRB_BLK_NUM(x) \
(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
((x)->kactive_blk_num+1) : 0)
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
static int packet_direct_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct sk_buff *orig_skb = skb;
struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
if (unlikely(!netif_running(dev) ||
!netif_carrier_ok(dev)))
goto drop;
skb = validate_xmit_skb_list(skb, dev);
if (skb != orig_skb)
goto drop;
txq = skb_get_tx_queue(dev, skb);
local_bh_disable();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
local_bh_enable();
if (!dev_xmit_complete(ret))
kfree_skb(skb);
return ret;
drop:
atomic_long_inc(&dev->tx_dropped);
kfree_skb_list(skb);
return NET_XMIT_DROP;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 145 | 89.51% | 5 | 55.56% |
Willem de Bruijn | 13 | 8.02% | 1 | 11.11% |
David S. Miller | 4 | 2.47% | 3 | 33.33% |
Total | 162 | 100.00% | 9 | 100.00% |
static struct net_device *packet_cached_dev_get(struct packet_sock *po)
{
struct net_device *dev;
rcu_read_lock();
dev = rcu_dereference(po->cached_dev);
if (likely(dev))
dev_hold(dev);
rcu_read_unlock();
return dev;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 48 | 100.00% | 1 | 100.00% |
Total | 48 | 100.00% | 1 | 100.00% |
static void packet_cached_dev_assign(struct packet_sock *po,
struct net_device *dev)
{
rcu_assign_pointer(po->cached_dev, dev);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 25 | 100.00% | 1 | 100.00% |
Total | 25 | 100.00% | 1 | 100.00% |
static void packet_cached_dev_reset(struct packet_sock *po)
{
RCU_INIT_POINTER(po->cached_dev, NULL);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 20 | 100.00% | 1 | 100.00% |
Total | 20 | 100.00% | 1 | 100.00% |
static bool packet_use_direct_xmit(const struct packet_sock *po)
{
return po->xmit == packet_direct_xmit;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 19 | 100.00% | 1 | 100.00% |
Total | 19 | 100.00% | 1 | 100.00% |
static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
{
return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 26 | 96.30% | 2 | 66.67% |
Li Zhong | 1 | 3.70% | 1 | 33.33% |
Total | 27 | 100.00% | 3 | 100.00% |
static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
{
const struct net_device_ops *ops = dev->netdev_ops;
u16 queue_index;
if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb, NULL,
__packet_pick_tx_queue);
queue_index = netdev_cap_txqueue(dev, queue_index);
} else {
queue_index = __packet_pick_tx_queue(dev, skb);
}
skb_set_queue_mapping(skb, queue_index);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 80 | 100.00% | 1 | 100.00% |
Total | 80 | 100.00% | 1 | 100.00% |
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
*/
static void register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
if (!po->running) {
if (po->fanout)
__fanout_link(sk, po);
else
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
David S. Miller | 63 | 100.00% | 2 | 100.00% |
Total | 63 | 100.00% | 2 | 100.00% |
/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
* held. If the sync parameter is true, we will temporarily drop
* the po->bind_lock and do a synchronize_net to make sure no
* asynchronous packet processing paths still refer to the elements
* of po->prot_hook. If the sync parameter is false, it is the
* callers responsibility to take care of this.
*/
static void __unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
po->running = 0;
if (po->fanout)
__fanout_unlink(sk, po);
else
__dev_remove_pack(&po->prot_hook);
__sock_put(sk);
if (sync) {
spin_unlock(&po->bind_lock);
synchronize_net();
spin_lock(&po->bind_lock);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
David S. Miller | 82 | 100.00% | 2 | 100.00% |
Total | 82 | 100.00% | 2 | 100.00% |
static void unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
if (po->running)
__unregister_prot_hook(sk, sync);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
David S. Miller | 37 | 100.00% | 1 | 100.00% |
Total | 37 | 100.00% | 1 | 100.00% |
static inline struct page * __pure pgv_to_page(void *addr)
{
if (is_vmalloc_addr(addr))
return vmalloc_to_page(addr);
return virt_to_page(addr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Changli Gao | 24 | 72.73% | 1 | 50.00% |
Michael S. Tsirkin | 9 | 27.27% | 1 | 50.00% |
Total | 33 | 100.00% | 2 | 100.00% |
static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_status = status;
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
h.h2->tp_status = status;
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
h.h3->tp_status = status;
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
}
smp_wmb();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Johann Baudy | 43 | 34.40% | 1 | 10.00% |
Patrick McHardy | 38 | 30.40% | 1 | 10.00% |
Sowmini Varadhan | 22 | 17.60% | 1 | 10.00% |
Chetan Loke | 7 | 5.60% | 1 | 10.00% |
Ulisses Alonso Camaró | 6 | 4.80% | 1 | 10.00% |
Daniel Borkmann | 3 | 2.40% | 1 | 10.00% |
Changli Gao | 2 | 1.60% | 1 | 10.00% |
Linus Torvalds (pre-git) | 2 | 1.60% | 1 | 10.00% |
Arnaldo Carvalho de Melo | 1 | 0.80% | 1 | 10.00% |
Michael S. Tsirkin | 1 | 0.80% | 1 | 10.00% |
Total | 125 | 100.00% | 10 | 100.00% |
static int __packet_get_status(struct packet_sock *po, void *frame)
{
union tpacket_uhdr h;
smp_rmb();
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
return h.h1->tp_status;
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
return h.h2->tp_status;
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
return h.h3->tp_status;
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
return 0;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Patrick McHardy | 40 | 33.61% | 1 | 16.67% |
Johann Baudy | 35 | 29.41% | 1 | 16.67% |
Sowmini Varadhan | 20 | 16.81% | 1 | 16.67% |
Chetan Loke | 19 | 15.97% | 1 | 16.67% |
Daniel Borkmann | 3 | 2.52% | 1 | 16.67% |
Changli Gao | 2 | 1.68% | 1 | 16.67% |
Total | 119 | 100.00% | 6 | 100.00% |
static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
unsigned int flags)
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
if (shhwtstamps &&
(flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
return TP_STATUS_TS_RAW_HARDWARE;
if (ktime_to_timespec_cond(skb->tstamp, ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 69 | 100.00% | 2 | 100.00% |
Total | 69 | 100.00% | 2 | 100.00% |
static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
struct sk_buff *skb)
{
union tpacket_uhdr h;
struct timespec ts;
__u32 ts_status;
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
return 0;
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_sec = ts.tv_sec;
h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
break;
case TPACKET_V2:
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
break;
case TPACKET_V3:
h.h3->tp_sec = ts.tv_sec;
h.h3->tp_nsec = ts.tv_nsec;
break;
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
}
/* one flush is safe, as both fields always lie on the same cacheline */
flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
smp_wmb();
return ts_status;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Willem de Bruijn | 129 | 75.00% | 1 | 25.00% |
Daniel Borkmann | 43 | 25.00% | 3 | 75.00% |
Total | 172 | 100.00% | 4 | 100.00% |
static void *packet_lookup_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
unsigned int position,
int status)
{
unsigned int pg_vec_pos, frame_offset;
union tpacket_uhdr h;
pg_vec_pos = position / rb->frames_per_block;
frame_offset = position % rb->frames_per_block;
h.raw = rb->pg_vec[pg_vec_pos].buffer +
(frame_offset * rb->frame_size);
if (status != __packet_get_status(po, h.raw))
return NULL;
return h.raw;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 89 | 96.74% | 1 | 50.00% |
Daniel Borkmann | 3 | 3.26% | 1 | 50.00% |
Total | 92 | 100.00% | 2 | 100.00% |
static void *packet_current_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
int status)
{
return packet_lookup_frame(po, rb, rb->head, status);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 34 | 100.00% | 1 | 100.00% |
Total | 34 | 100.00% | 1 | 100.00% |
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
del_timer_sync(&pkc->retire_blk_timer);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 19 | 100.00% | 2 | 100.00% |
Total | 19 | 100.00% | 2 | 100.00% |
static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
struct sk_buff_head *rb_queue)
{
struct tpacket_kbdq_core *pkc;
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
spin_lock_bh(&rb_queue->lock);
pkc->delete_blk_timer = 1;
spin_unlock_bh(&rb_queue->lock);
prb_del_retire_blk_timer(pkc);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 53 | 91.38% | 2 | 50.00% |
Duan Jiong | 3 | 5.17% | 1 | 25.00% |
Veaceslav Falico | 2 | 3.45% | 1 | 25.00% |
Total | 58 | 100.00% | 4 | 100.00% |
static void prb_init_blk_timer(struct packet_sock *po,
struct tpacket_kbdq_core *pkc,
void (*func) (unsigned long))
{
init_timer(&pkc->retire_blk_timer);
pkc->retire_blk_timer.data = (long)po;
pkc->retire_blk_timer.function = func;
pkc->retire_blk_timer.expires = jiffies;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 61 | 100.00% | 2 | 100.00% |
Total | 61 | 100.00% | 2 | 100.00% |
static void prb_setup_retire_blk_timer(struct packet_sock *po)
{
struct tpacket_kbdq_core *pkc;
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 32 | 91.43% | 2 | 66.67% |
Duan Jiong | 3 | 8.57% | 1 | 33.33% |
Total | 35 | 100.00% | 3 | 100.00% |
static int prb_calc_retire_blk_tmo(struct packet_sock *po,
int blk_size_in_bytes)
{
struct net_device *dev;
unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
struct ethtool_link_ksettings ecmd;
int err;
rtnl_lock();
dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
if (unlikely(!dev)) {
rtnl_unlock();
return DEFAULT_PRB_RETIRE_TOV;
}
err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
if (!err) {
/*
* If the link speed is so slow you don't really
* need to worry about perf anyways
*/
if (ecmd.base.speed < SPEED_1000 ||
ecmd.base.speed == SPEED_UNKNOWN) {
return DEFAULT_PRB_RETIRE_TOV;
} else {
msec = 1;
div = ecmd.base.speed / 1000;
}
}
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
if (div)
mbits /= div;
tmo = mbits * msec;
if (div)
return tmo+1;
return tmo;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 127 | 70.17% | 1 | 25.00% |
Jiri Pirko | 25 | 13.81% | 1 | 25.00% |
Parav Pandit | 15 | 8.29% | 1 | 25.00% |
David Decotigny | 14 | 7.73% | 1 | 25.00% |
Total | 181 | 100.00% | 4 | 100.00% |
static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
union tpacket_req_u *req_u)
{
p1->feature_req_word = req_u->req3.tp_feature_req_word;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 26 | 100.00% | 2 | 100.00% |
Total | 26 | 100.00% | 2 | 100.00% |
static void init_prb_bdqc(struct packet_sock *po,
struct packet_ring_buffer *rb,
struct pgv *pg_vec,
union tpacket_req_u *req_u)
{
struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
struct tpacket_block_desc *pbd;
memset(p1, 0x0, sizeof(*p1));
p1->knxt_seq_num = 1;
p1->pkbdq = pg_vec;
pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
p1->pkblk_start = pg_vec[0].buffer;
p1->kblk_size = req_u->req3.tp_block_size;
p1->knum_blocks = req_u->req3.tp_block_nr;
p1->hdrlen = po->tp_hdrlen;
p1->version = po->tp_version;
p1->last_kactive_blk_num = 0;
po->stats.stats3.tp_freeze_q_cnt = 0;
if (req_u->req3.tp_retire_blk_tov)
p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
else
p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
req_u->req3.tp_block_size);
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
prb_setup_retire_blk_timer(po);
prb_open_block(p1, pbd);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 213 | 91.81% | 2 | 40.00% |
Eric Dumazet | 15 | 6.47% | 1 | 20.00% |
Duan Jiong | 3 | 1.29% | 1 | 20.00% |
Daniel Borkmann | 1 | 0.43% | 1 | 20.00% |
Total | 232 | 100.00% | 5 | 100.00% |
/* Do NOT update the last_blk_num first.
* Assumes sk_buff_head lock is held.
*/
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
mod_timer(&pkc->retire_blk_timer,
jiffies + pkc->tov_in_jiffies);
pkc->last_kactive_blk_num = pkc->kactive_blk_num;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 33 | 100.00% | 2 | 100.00% |
Total | 33 | 100.00% | 2 | 100.00% |
/*
* Timer logic:
* 1) We refresh the timer only when we open a block.
* By doing this we don't waste cycles refreshing the timer
* on packet-by-packet basis.
*
* With a 1MB block-size, on a 1Gbps line, it will take
* i) ~8 ms to fill a block + ii) memcpy etc.
* In this cut we are not accounting for the memcpy time.
*
* So, if the user sets the 'tmo' to 10ms then the timer
* will never fire while the block is still getting filled
* (which is what we want). However, the user could choose
* to close a block early and that's fine.
*
* But when the timer does fire, we check whether or not to refresh it.
* Since the tmo granularity is in msecs, it is not too expensive
* to refresh the timer, lets say every '8' msecs.
* Either the user can set the 'tmo' or we can derive it based on
* a) line-speed and b) block-size.
* prb_calc_retire_blk_tmo() calculates the tmo.
*
*/
static void prb_retire_rx_blk_timer_expired(unsigned long data)
{
struct packet_sock *po = (struct packet_sock *)data;
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
unsigned int frozen;
struct tpacket_block_desc *pbd;
spin_lock(&po->sk.sk_receive_queue.lock);
frozen = prb_queue_frozen(pkc);
pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
if (unlikely(pkc->delete_blk_timer))
goto out;
/* We only need to plug the race when the block is partially filled.
* tpacket_rcv:
* lock(); increment BLOCK_NUM_PKTS; unlock()
* copy_bits() is in progress ...
* timer fires on other cpu:
* we can't retire the current block because copy_bits
* is in progress.
*
*/
if (BLOCK_NUM_PKTS(pbd)) {
while (atomic_read(&pkc->blk_fill_in_prog)) {
/* Waiting for skb_copy_bits to finish... */
cpu_relax();
}
}
if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
if (!frozen) {
if (!BLOCK_NUM_PKTS(pbd)) {
/* An empty block. Just refresh the timer. */
goto refresh_timer;
}
prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
if (!prb_dispatch_next_block(pkc, po))
goto refresh_timer;
else
goto out;
} else {
/* Case 1. Queue was frozen because user-space was
* lagging behind.
*/
if (prb_curr_blk_in_use(pkc, pbd)) {
/*
* Ok, user-space is still behind.
* So just refresh the timer.
*/
goto refresh_timer;
} else {
/* Case 2. queue was frozen,user-space caught up,
* now the link went idle && the timer fired.
* We don't have a block to close.So we open this
* block and restart the timer.
* opening a block thaws the queue,restarts timer
* Thawing/timer-refresh is a side effect.
*/
prb_open_block(pkc, pbd);
goto out;
}
}
}
refresh_timer:
_prb_refresh_rx_retire_blk_timer(pkc);
out:
spin_unlock(&po->sk.sk_receive_queue.lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 204 | 92.31% | 2 | 50.00% |
Alexander Drozdov | 14 | 6.33% | 1 | 25.00% |
Duan Jiong | 3 | 1.36% | 1 | 25.00% |
Total | 221 | 100.00% | 4 | 100.00% |
static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
struct tpacket_block_desc *pbd1, __u32 status)
{
/* Flush everything minus the block header */
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
u8 *start, *end;
start = (u8 *)pbd1;
/* Skip the block header(we know header WILL fit in 4K) */
start += PAGE_SIZE;
end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
for (; start < end; start += PAGE_SIZE)
flush_dcache_page(pgv_to_page(start));
smp_wmb();
#endif
/* Now update the block status. */
BLOCK_STATUS(pbd1) = status;
/* Flush the block header */
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
start = (u8 *)pbd1;
flush_dcache_page(pgv_to_page(start));
smp_wmb();
#endif
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Chetan Loke | 121 | 100.00% | 2 | 100.00% |
Total | 121 | 100.00% | 2 | 100.00% |
/*
* Side effect:
*
* 1) flush the block
* 2) Increment active_blk_num
*
* Note:We DONT refresh the timer on purpose.
* Because almost always the next block will be opened.
*/
static void prb_close_block(struct tpacket_kbdq_core *pkc1,
struct tpacket_block_desc *pbd1,
struct packet_sock *po, unsigned int stat)
{
__u32 status = TP_STATUS_USER | stat;
struct tpacket3_hdr *last_pkt;
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
struct sock *sk = &po->sk;
if (po->stats.stats3.tp_drops)
status |= TP_STATUS_LOSING;
last_pkt = (struct tpacket3_hdr *)pkc1->prev;
last_pkt->tp_next_offset = 0;
/* Get the ts of the last pkt */
if (BLOCK_NUM_PKTS(pbd1)) {
h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
} else {
/* Ok, we tmo'd - so get the current time.
*
* It shouldn't really happen as we don't close empty
* blocks. See prb_retire_rx_blk_timer_expired().
*/
struct