cregit-Linux how code gets into the kernel

Release 4.14 net/sched/cls_u32.c

Directory: net/sched
/*
 * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *      The filters are packed to hash tables of key nodes
 *      with a set of 32bit key/mask pairs at every node.
 *      Nodes reference next level hash tables etc.
 *
 *      This scheme is the best universal classifier I managed to
 *      invent; it is not super-fast, but it is not slow (provided you
 *      program it correctly), and general enough.  And its relative
 *      speed grows as the number of rules becomes larger.
 *
 *      It seems that it represents the best middle point between
 *      speed and manageability both by human and by machine.
 *
 *      It is especially useful for link sharing combined with QoS;
 *      pure RSVP doesn't need such a general approach and can use
 *      much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *      JHS: We should remove the CONFIG_NET_CLS_IND from here
 *      eventually when the meta match extension is made available
 *
 *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
#include <linux/netdevice.h>
#include <linux/hash.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/netdevice.h>


struct tc_u_knode {
	
struct tc_u_knode __rcu	*next;
	
u32			handle;
	
struct tc_u_hnode __rcu	*ht_up;
	
struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
	
int			ifindex;
#endif
	
u8			fshift;
	
struct tcf_result	res;
	
struct tc_u_hnode __rcu	*ht_down;
#ifdef CONFIG_CLS_U32_PERF
	
struct tc_u32_pcnt __percpu *pf;
#endif
	
u32			flags;
#ifdef CONFIG_CLS_U32_MARK
	
u32			val;
	
u32			mask;
	
u32 __percpu		*pcpu_success;
#endif
	
struct tcf_proto	*tp;
	union {
		
struct work_struct	work;
		
struct rcu_head		rcu;
	};
	/* The 'sel' field MUST be the last field in structure to allow for
         * tc_u32_keys allocated at end of structure.
         */
	
struct tc_u32_sel	sel;
};


struct tc_u_hnode {
	
struct tc_u_hnode __rcu	*next;
	
u32			handle;
	
u32			prio;
	
struct tc_u_common	*tp_c;
	
int			refcnt;
	
unsigned int		divisor;
	
struct rcu_head		rcu;
	/* The 'ht' field MUST be the last field in structure to allow for
         * more entries allocated at end of structure.
         */
	
struct tc_u_knode __rcu	*ht[1];
};


struct tc_u_common {
	
struct tc_u_hnode __rcu	*hlist;
	
struct Qdisc		*q;
	
int			refcnt;
	
u32			hgenerator;
	
struct hlist_node	hnode;
	
struct rcu_head		rcu;
};


static inline unsigned int u32_hash_fold(__be32 key, const struct tc_u32_sel *sel, u8 fshift) { unsigned int h = ntohl(key & sel->hmask) >> fshift; return h; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)2565.79%116.67%
Jamal Hadi Salim513.16%233.33%
Eric Dumazet410.53%116.67%
Radu Rendec37.89%116.67%
Al Viro12.63%116.67%
Total38100.00%6100.00%


static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct { struct tc_u_knode *knode; unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; int sel = 0; #ifdef CONFIG_CLS_U32_PERF int j; #endif int i, r; next_ht: n = rcu_dereference_bh(ht->ht[sel]); next_knode: if (n) { struct tc_u32_key *key = n->sel.keys; #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rcnt); j = 0; #endif if (tc_skip_sw(n->flags)) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_MARK if ((skb->mark & n->mask) != n->val) { n = rcu_dereference_bh(n->next); goto next_knode; } else { __this_cpu_inc(*n->pcpu_success); } #endif for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); __be32 *data, hdata; if (skb_headroom(skb) + toff > INT_MAX) goto out; data = skb_header_pointer(skb, toff, 4, &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->kcnts[j]); j++; #endif } ht = rcu_dereference_bh(n->ht_down); if (!ht) { check_terminal: if (n->sel.flags & TC_U32_TERMINAL) { *res = n->res; #ifdef CONFIG_NET_CLS_IND if (!tcf_match_indev(skb, n->ifindex)) { n = rcu_dereference_bh(n->next); goto next_knode; } #endif #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rhit); #endif r = tcf_exts_exec(skb, &n->exts, res); if (r < 0) { n = rcu_dereference_bh(n->next); goto next_knode; } return r; } n = rcu_dereference_bh(n->next); goto next_knode; } /* PUSH */ if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; stack[sdepth].off = off; sdepth++; ht = rcu_dereference_bh(n->ht_down); sel = 0; if (ht->divisor) { __be32 *data, hdata; data = skb_header_pointer(skb, off + n->sel.hoff, 4, &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, n->fshift); } if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) goto next_ht; if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, hdata; data = skb_header_pointer(skb, off + n->sel.offoff, 2, &hdata); if (!data) goto out; off2 += ntohs(n->sel.offmask & *data) >> n->sel.offshift; } off2 &= ~3; } if (n->sel.flags & TC_U32_EAT) { off += off2; off2 = 0; } if (off < skb->len) goto next_ht; } /* POP */ if (sdepth--) { n = stack[sdepth].knode; ht = rcu_dereference_bh(n->ht_up); off = stack[sdepth].off; goto check_terminal; } out: return -1; deadloop: net_warn_ratelimited("cls_u32: dead loop\n"); return -1; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)41552.80%29.09%
Changli Gao11614.76%14.55%
Jamal Hadi Salim8610.94%313.64%
John Fastabend516.49%29.09%
Catalin(ux aka Dino) M. Boie374.71%14.55%
Stephen Hemminger253.18%29.09%
Thomas Graf232.93%313.64%
Sridhar Samudrala232.93%14.55%
David S. Miller20.25%14.55%
Dan Carpenter20.25%14.55%
Arnaldo Carvalho de Melo20.25%14.55%
Américo Wang10.13%14.55%
Linus Torvalds10.13%14.55%
Eric Dumazet10.13%14.55%
Joe Perches10.13%14.55%
Total786100.00%22100.00%


static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) if (ht->handle == handle) break; return ht; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)3360.00%133.33%
Eric Dumazet1629.09%133.33%
John Fastabend610.91%133.33%
Total55100.00%3100.00%


static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; sel = TC_U32_HASH(handle); if (sel > ht->divisor) goto out; for (n = rtnl_dereference(ht->ht[sel]); n; n = rtnl_dereference(n->next)) if (n->handle == handle) break; out: return n; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)6880.95%125.00%
Arnaldo Carvalho de Melo89.52%125.00%
John Fastabend67.14%125.00%
Eric Dumazet22.38%125.00%
Total84100.00%4100.00%


static void *u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; if (TC_U32_HTID(handle) == TC_U32_ROOT) ht = rtnl_dereference(tp->root); else ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) return NULL; if (TC_U32_KEY(handle) == 0) return ht; return u32_lookup_key(ht, handle); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8293.18%133.33%
Américo Wang33.41%133.33%
John Fastabend33.41%133.33%
Total88100.00%3100.00%


static u32 gen_new_htid(struct tc_u_common *tp_c) { int i = 0x800; /* hgenerator only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ do { if (++tp_c->hgenerator == 0x7FF) tp_c->hgenerator = 1; } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)7397.33%133.33%
John Fastabend11.33%133.33%
Linus Torvalds11.33%133.33%
Total75100.00%3100.00%

static struct hlist_head *tc_u_common_hash; #define U32_HASH_SHIFT 10 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
static unsigned int tc_u_hash(const struct tcf_proto *tp) { struct net_device *dev = tp->q->dev_queue->dev; u32 qhandle = tp->q->handle; int ifindex = dev->ifindex; return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT); }

Contributors

PersonTokensPropCommitsCommitProp
Américo Wang57100.00%1100.00%
Total57100.00%1100.00%


static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) { struct tc_u_common *tc; unsigned int h; h = tc_u_hash(tp); hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { if (tc->q == tp->q) return tc; } return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Américo Wang56100.00%1100.00%
Total56100.00%1100.00%


static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; unsigned int h; tp_c = tc_u_common_find(tp); root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; root_ht->refcnt++; root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; root_ht->prio = tp->prio; if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; } tp_c->q = tp->q; INIT_HLIST_NODE(&tp_c->hnode); h = tc_u_hash(tp); hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); } tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)14972.68%116.67%
Américo Wang3215.61%116.67%
John Fastabend125.85%116.67%
Patrick McHardy83.90%116.67%
David S. Miller20.98%116.67%
Panagiotis Issaris20.98%116.67%
Total205100.00%6100.00%


static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); #endif #ifdef CONFIG_CLS_U32_MARK if (free_pf) free_percpu(n->pcpu_success); #endif kfree(n); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4146.59%112.50%
John Fastabend2427.27%337.50%
Jamal Hadi Salim1213.64%225.00%
Américo Wang89.09%112.50%
Thomas Graf33.41%112.50%
Total88100.00%8100.00%

/* u32_delete_key_rcu should be called when free'ing a copied * version of a tc_u_knode obtained from u32_init_knode(). When * copies are obtained from u32_init_knode() the statistics are * shared between the old and new copies to allow readers to * continue to update the statistics during the copy. To support * this the u32_delete_key_rcu variant does not free the percpu * statistics. */
static void u32_delete_key_work(struct work_struct *work) { struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); rtnl_lock(); u32_destroy_key(key->tp, key, false); rtnl_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Américo Wang43100.00%1100.00%
Total43100.00%1100.00%


static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); INIT_WORK(&key->work, u32_delete_key_work); tcf_queue_work(&key->work); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend3375.00%266.67%
Américo Wang1125.00%133.33%
Total44100.00%3100.00%

/* u32_delete_key_freepf_rcu is the rcu callback variant * that free's the entire structure including the statistics * percpu variables. Only use this if the key is not a copy * returned by u32_init_knode(). See u32_delete_key_rcu() * for the variant that should be used with keys return from * u32_init_knode() */
static void u32_delete_key_freepf_work(struct work_struct *work) { struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); rtnl_lock(); u32_destroy_key(key->tp, key, true); rtnl_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Américo Wang43100.00%1100.00%
Total43100.00%1100.00%


static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); INIT_WORK(&key->work, u32_delete_key_freepf_work); tcf_queue_work(&key->work); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend3375.00%266.67%
Américo Wang1125.00%133.33%
Total44100.00%3100.00%


static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_knode __rcu **kp; struct tc_u_knode *pkp; struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); if (ht) { kp = &ht->ht[TC_U32_HASH(key->handle)]; for (pkp = rtnl_dereference(*kp); pkp; kp = &pkp->next, pkp = rtnl_dereference(*kp)) { if (pkp == key) { RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } } } WARN_ON(1); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)9061.22%333.33%
John Fastabend3725.17%333.33%
Américo Wang1812.24%222.22%
Ilpo Järvinen21.36%111.11%
Total147100.00%9100.00%


static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; if (!tc_should_offload(dev, 0)) return; tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend6376.83%350.00%
Jiri Pirko1923.17%350.00%
Total82100.00%6100.00%


static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; int err; if (!tc_should_offload(dev, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); if (tc_skip_sw(flags)) return err; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend7856.93%228.57%
Jakub Kiciński2115.33%114.29%
Jiri Pirko1913.87%342.86%
Sridhar Samudrala1913.87%114.29%
Total137100.00%7100.00%


static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; if (!tc_should_offload(dev, 0)) return; tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend8580.19%350.00%
Jiri Pirko2119.81%350.00%
Total106100.00%6100.00%


static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; int err; if (!tc_should_offload(dev, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK cls_u32.knode.val = n->val; cls_u32.knode.mask = n->mask; #else cls_u32.knode.val = 0; cls_u32.knode.mask = 0; #endif cls_u32.knode.sel = &n->sel; cls_u32.knode.exts = &n->exts; if (n->ht_down) cls_u32.knode.link_handle = n->ht_down->handle; err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); if (!err) n->flags |= TCA_CLS_FLAGS_IN_HW; if (tc_skip_sw(flags)) return err; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend15469.68%333.33%
Jiri Pirko2511.31%333.33%
Sridhar Samudrala198.60%111.11%
Jakub Kiciński125.43%111.11%
Or Gerlitz114.98%111.11%
Total221100.00%9100.00%


static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; unsigned int h; for (h = 0; h <= ht->divisor; h++) { while ((n = rtnl_dereference(ht->ht[h])) != NULL) { RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); if (tcf_exts_get_net(&n->exts)) call_rcu(&n->rcu, u32_delete_key_freepf_rcu); else u32_destroy_key(n->tp, n, true); } } }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)6751.54%114.29%
Américo Wang3728.46%228.57%
John Fastabend2519.23%342.86%
Eric Dumazet10.77%114.29%
Total130100.00%7100.00%


static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; WARN_ON(ht->refcnt); u32_clear_hnode(tp, ht); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; } } return -ENOENT; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8365.87%120.00%
John Fastabend4031.75%240.00%
Américo Wang21.59%120.00%
Ilpo Järvinen10.79%120.00%
Total126100.00%5100.00%


static bool ht_empty(struct tc_u_hnode *ht) { unsigned int h; for (h = 0; h <= ht->divisor; h++) if (rcu_access_pointer(ht->ht[h])) return false; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Cong Wang48100.00%1100.00%
Total48100.00%1100.00%


static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); for (ht = rtnl_dereference(tp_c->hlist); ht;