cregit-Linux how code gets into the kernel

Release 4.8 net/core/netpoll.c

Directory: net/core
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */


#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
#include <linux/if_arp.h>
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>
#include <trace/events/napi.h>

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */


#define MAX_UDP_CHUNK 1460

#define MAX_SKBS 32


static struct sk_buff_head skb_pool;


DEFINE_STATIC_SRCU(netpoll_srcu);


#define USEC_PER_POLL	50


#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +                                        \
         sizeof(struct iphdr) +                                         \
         sizeof(struct udphdr) +                                        \
         MAX_UDP_CHUNK)

static void zap_completion_queue(void);
static void netpoll_async_cleanup(struct work_struct *work);


static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);


#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)

#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)

#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)


static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { int status = NETDEV_TX_OK; netdev_features_t features; features = netif_skb_features(skb); if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { /* This is actually a packet drop, but we * don't want the code that calls this * function to try and operate on a NULL skb. */ goto out; } } status = netdev_start_xmit(skb, dev, txq, false); out: return status; }

Contributors

PersonTokensPropCommitsCommitProp
eric w. biedermaneric w. biederman8893.62%116.67%
david s. millerdavid s. miller44.26%350.00%
jiri pirkojiri pirko22.13%233.33%
Total94100.00%6100.00%


static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { kfree_skb(skb); continue; } txq = skb_get_tx_queue(dev, skb); local_irq_save(flags); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger5430.00%215.38%
matt mackallmatt mackall5228.89%17.69%
david s. millerdavid s. miller2312.78%215.38%
ingo molnaringo molnar2212.22%17.69%
david howellsdavid howells147.78%215.38%
eric w. biedermaneric w. biederman137.22%323.08%
tom herberttom herbert10.56%17.69%
daniel borkmanndaniel borkmann10.56%17.69%
Total180100.00%13100.00%

/* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. * If this fails, either we've recursed in ->poll() or it's already * running on another CPU. * * Note: we don't mask interrupts with this lock because we're using * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. */
static void poll_one_napi(struct napi_struct *napi) { int work = 0; /* net_rx_action's ->poll() invocations and our's are * synchronized by this test which is only made while * holding the napi->poll_lock. */ if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) return; /* We explicilty pass the polling call a budget of 0 to * indicate that we are clearing the Tx path only. */ work = napi->poll(napi, 0); WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi, work, 0); clear_bit(NAPI_STATE_NPSVC, &napi->state); }

Contributors

PersonTokensPropCommitsCommitProp
david s. millerdavid s. miller3843.68%114.29%
neil hormanneil horman2933.33%342.86%
eric w. biedermaneric w. biederman1112.64%114.29%
alexander duyckalexander duyck55.75%114.29%
jesper dangaard brouerjesper dangaard brouer44.60%114.29%
Total87100.00%7100.00%


static void poll_napi(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { poll_one_napi(napi); spin_unlock(&napi->poll_lock); } } }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall2339.66%220.00%
stephen hemmingerstephen hemminger1932.76%220.00%
andrew mortonandrew morton712.07%110.00%
jeff moyerjeff moyer610.34%220.00%
david s. millerdavid s. miller23.45%220.00%
neil hormanneil horman11.72%110.00%
Total58100.00%10100.00%


static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity * while changing device state */ if (down_trylock(&ni->dev_lock)) return; if (!netif_running(dev)) { up(&ni->dev_lock); return; } ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { up(&ni->dev_lock); return; } /* Process pending work on NIC */ ops->ndo_poll_controller(dev); poll_napi(dev); up(&ni->dev_lock); zap_completion_queue(); }

Contributors

PersonTokensPropCommitsCommitProp
neil hormanneil horman4037.04%430.77%
andrew mortonandrew morton2422.22%17.69%
americo wangamerico wang1513.89%215.38%
stephen hemmingerstephen hemminger1412.96%215.38%
pavel emelianovpavel emelianov109.26%17.69%
david s. millerdavid s. miller32.78%17.69%
joe perchesjoe perches10.93%17.69%
matt mackallmatt mackall10.93%17.69%
Total108100.00%13100.00%


void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; might_sleep(); idx = srcu_read_lock(&netpoll_srcu); ni = srcu_dereference(dev->npinfo, &netpoll_srcu); if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); }

Contributors

PersonTokensPropCommitsCommitProp
neil hormanneil horman5996.72%250.00%
ding tianhongding tianhong11.64%125.00%
eric w. biedermaneric w. biederman11.64%125.00%
Total61100.00%4100.00%

EXPORT_SYMBOL(netpoll_poll_disable);
void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); ni = rcu_dereference(dev->npinfo); if (ni) up(&ni->dev_lock); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
neil hormanneil horman4197.62%266.67%
eric w. biedermaneric w. biederman12.38%133.33%
Total42100.00%3100.00%

EXPORT_SYMBOL(netpoll_poll_enable);
static void refill_skbs(void) { struct sk_buff *skb; unsigned long flags; spin_lock_irqsave(&skb_pool.lock, flags); while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; __skb_queue_tail(&skb_pool, skb); } spin_unlock_irqrestore(&skb_pool.lock, flags); }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall5578.57%150.00%
stephen hemmingerstephen hemminger1521.43%150.00%
Total70100.00%2100.00%


static void zap_completion_queue(void) { unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); if (sd->completion_queue) { struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { atomic_inc(&skb->users); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); } } } put_cpu_var(softnet_data); }

Contributors

PersonTokensPropCommitsCommitProp
david s. millerdavid s. miller11296.55%150.00%
eric w. biedermaneric w. biederman43.45%150.00%
Total116100.00%2100.00%


static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { int count = 0; struct sk_buff *skb; zap_completion_queue(); refill_skbs(); repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) skb = skb_dequeue(&skb_pool); if (!skb) { if (++count < 10) { netpoll_poll_dev(np->dev); goto repeat; } return NULL; } atomic_set(&skb->users, 1); skb_reserve(skb, reserve); return skb; }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall9184.26%125.00%
stephen hemmingerstephen hemminger1110.19%125.00%
david s. millerdavid s. miller32.78%125.00%
joe perchesjoe perches32.78%125.00%
Total108100.00%4100.00%


static int netpoll_owner_active(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner == smp_processor_id()) return 1; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger41100.00%1100.00%
Total41100.00%1100.00%

/* call with IRQ disabled */
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { int status = NETDEV_TX_BUSY; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; WARN_ON_ONCE(!irqs_disabled()); npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); return; } /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; txq = netdev_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (HARD_TX_TRYLOCK(dev, txq)) { if (!netif_xmit_stopped(txq)) status = netpoll_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); if (status == NETDEV_TX_OK) break; } /* tickle device maybe there is some cleanup */ netpoll_poll_dev(np->dev); udelay(USEC_PER_POLL); } WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", dev->name, dev->netdev_ops->ndo_start_xmit); } if (status != NETDEV_TX_OK) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall7430.83%723.33%
stephen hemmingerstephen hemminger7129.58%413.33%
david s. millerdavid s. miller187.50%26.67%
americo wangamerico wang166.67%26.67%
dongdong dengdongdong deng156.25%13.33%
eric w. biedermaneric w. biederman125.00%310.00%
jeff moyerjeff moyer104.17%13.33%
neil hormanneil horman62.50%13.33%
andrew mortonandrew morton31.25%13.33%
joe perchesjoe perches31.25%13.33%
eric dumazeteric dumazet31.25%13.33%
jason wangjason wang20.83%13.33%
david howellsdavid howells20.83%13.33%
jeremy fitzhardingejeremy fitzhardinge20.83%13.33%
tom herberttom herbert10.42%13.33%
herbert xuherbert xu10.42%13.33%
peter p waskiewiczpeter p waskiewicz10.42%13.33%
Total240100.00%30100.00%

EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; static atomic_t ip_ident; struct ipv6hdr *ip6h; WARN_ON_ONCE(!irqs_disabled()); udp_len = len + sizeof(*udph); if (np->ipv6) ip_len = udp_len + sizeof(*ip6h); else ip_len = udp_len + sizeof(*iph); total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); udph = udp_hdr(skb); udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); if (np->ipv6) { udph->check = 0; udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); /* ip6h->version = 6; ip6h->priority = 0; */ put_unaligned(0x60, (unsigned char *)ip6h); ip6h->flow_lbl[0] = 0; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->payload_len = htons(sizeof(struct udphdr) + len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = 32; ip6h->saddr = np->local_ip.in6; ip6h->daddr = np->remote_ip.in6; eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); iph = ip_hdr(skb); /* iph->version = 4; iph->ihl = 5; */ put_unaligned(0x45, (unsigned char *)iph); iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); iph->id = htons(atomic_inc_return(&ip_ident)); iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; put_unaligned(np->local_ip.ip, &(iph->saddr)); put_unaligned(np->remote_ip.ip, &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IP); } ether_addr_copy(eth->h_source, np->dev->dev_addr); ether_addr_copy(eth->h_dest, np->remote_mac); skb->dev = np->dev; netpoll_send_skb(np, skb); }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall32347.64%420.00%
americo wangamerico wang23734.96%210.00%
chris lalancettechris lalancette416.05%15.00%
eric dumazeteric dumazet294.28%210.00%
arnaldo carvalho de meloarnaldo carvalho de melo284.13%525.00%
stephen hemmingerstephen hemminger91.33%210.00%
nikolay aleksandrovnikolay aleksandrov71.03%15.00%
joe perchesjoe perches20.29%15.00%
sven henkelsven henkel10.15%15.00%
al viroal viro10.15%15.00%
Total678100.00%20100.00%

EXPORT_SYMBOL(netpoll_send_udp);
void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); if (np->ipv6) np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); if (np->ipv6) np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); else np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); }

Contributors

PersonTokensPropCommitsCommitProp
satyam sharmasatyam sharma5040.32%111.11%
americo wangamerico wang4838.71%222.22%
joe perchesjoe perches1713.71%222.22%
matt mackallmatt mackall32.42%111.11%
eric w. biedermaneric w. biederman32.42%111.11%
harvey harrisonharvey harrison21.61%111.11%
jeff moyerjeff moyer10.81%111.11%
Total124100.00%9100.00%

EXPORT_SYMBOL(netpoll_print_options);
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) { const char *end; if (!strchr(str, ':') && in4_pton(str, -1, (void *)addr, -1, &end) > 0) { if (!*end) return 0; } if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { #if IS_ENABLED(CONFIG_IPV6) if (!*end) return 1; #else return -1; #endif } return -1; }

Contributors

PersonTokensPropCommitsCommitProp
americo wangamerico wang117100.00%1100.00%
Total117100.00%1100.00%


int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; int ipv6; bool ipversion_set = false; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; if (kstrtou16(cur, 10, &np->local_port)) goto parse_failed; cur = delim; } cur++; if (*cur != '/') { ipversion_set = true; if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); if (ipv6 < 0) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim; } cur++; if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; *delim = 0; strlcpy(np->dev_name, cur, sizeof(np->dev_name)); cur = delim; } cur++; if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); if (kstrtou16(cur, 10, &np->remote_port)) goto parse_failed; cur = delim; } cur++; /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { /* MAC address */ if (!mac_pton(cur, np->remote_mac)) goto parse_failed; } netpoll_print_options(np); return 0; parse_failed: np_info(np, "couldn't parse config at '%s'!\n", cur); return -1; }

Contributors

PersonTokensPropCommitsCommitProp
matt mackallmatt mackall28467.78%112.50%
americo wangamerico wang8620.53%225.00%
abhijit pawarabhijit pawar296.92%112.50%
sabrina dubrocasabrina dubroca112.63%112.50%
joe perchesjoe perches61.43%112.50%
alexey dobriyanalexey dobriyan20.48%112.50%
satyam sharmasatyam sharma10.24%112.50%
Total419100.00%8100.00%

EXPORT_SYMBOL(netpoll_parse_options);
int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; int err; np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { np_err(np, "%s doesn't support polling, aborting\n", np->dev_name); err = -ENOTSUPP; goto out; } if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; } sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); atomic_set(&npinfo->refcnt, 1); ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { err = ops->ndo_netpoll_setup(ndev, npinfo); if (err) goto