cregit-Linux how code gets into the kernel

Release 4.11 net/sched/cls_u32.c

Directory: net/sched
/*
 * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *      The filters are packed to hash tables of key nodes
 *      with a set of 32bit key/mask pairs at every node.
 *      Nodes reference next level hash tables etc.
 *
 *      This scheme is the best universal classifier I managed to
 *      invent; it is not super-fast, but it is not slow (provided you
 *      program it correctly), and general enough.  And its relative
 *      speed grows as the number of rules becomes larger.
 *
 *      It seems that it represents the best middle point between
 *      speed and manageability both by human and by machine.
 *
 *      It is especially useful for link sharing combined with QoS;
 *      pure RSVP doesn't need such a general approach and can use
 *      much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *      JHS: We should remove the CONFIG_NET_CLS_IND from here
 *      eventually when the meta match extension is made available
 *
 *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/netdevice.h>


struct tc_u_knode {
	
struct tc_u_knode __rcu	*next;
	
u32			handle;
	
struct tc_u_hnode __rcu	*ht_up;
	
struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
	
int			ifindex;
#endif
	
u8			fshift;
	
struct tcf_result	res;
	
struct tc_u_hnode __rcu	*ht_down;
#ifdef CONFIG_CLS_U32_PERF
	
struct tc_u32_pcnt __percpu *pf;
#endif
	
u32			flags;
#ifdef CONFIG_CLS_U32_MARK
	
u32			val;
	
u32			mask;
	
u32 __percpu		*pcpu_success;
#endif
	
struct tcf_proto	*tp;
	
struct rcu_head		rcu;
	/* The 'sel' field MUST be the last field in structure to allow for
         * tc_u32_keys allocated at end of structure.
         */
	
struct tc_u32_sel	sel;
};


struct tc_u_hnode {
	
struct tc_u_hnode __rcu	*next;
	
u32			handle;
	
u32			prio;
	
struct tc_u_common	*tp_c;
	
int			refcnt;
	
unsigned int		divisor;
	
struct rcu_head		rcu;
	/* The 'ht' field MUST be the last field in structure to allow for
         * more entries allocated at end of structure.
         */
	
struct tc_u_knode __rcu	*ht[1];
};


struct tc_u_common {
	
struct tc_u_hnode __rcu	*hlist;
	
struct Qdisc		*q;
	
int			refcnt;
	
u32			hgenerator;
	
struct rcu_head		rcu;
};


static inline unsigned int u32_hash_fold(__be32 key, const struct tc_u32_sel *sel, u8 fshift) { unsigned int h = ntohl(key & sel->hmask) >> fshift; return h; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)2565.79%116.67%
Jamal Hadi Salim513.16%233.33%
Eric Dumazet410.53%116.67%
Radu Rendec37.89%116.67%
Al Viro12.63%116.67%
Total38100.00%6100.00%


static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct { struct tc_u_knode *knode; unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; int sel = 0; #ifdef CONFIG_CLS_U32_PERF int j; #endif int i, r; next_ht: n = rcu_dereference_bh(ht->ht[sel]); next_knode: if (n) { struct tc_u32_key *key = n->sel.keys; #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rcnt); j = 0; #endif if (tc_skip_sw(n->flags)) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_MARK if ((skb->mark & n->mask) != n->val) { n = rcu_dereference_bh(n->next); goto next_knode; } else { __this_cpu_inc(*n->pcpu_success); } #endif for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); __be32 *data, hdata; if (skb_headroom(skb) + toff > INT_MAX) goto out; data = skb_header_pointer(skb, toff, 4, &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->kcnts[j]); j++; #endif } ht = rcu_dereference_bh(n->ht_down); if (!ht) { check_terminal: if (n->sel.flags & TC_U32_TERMINAL) { *res = n->res; #ifdef CONFIG_NET_CLS_IND if (!tcf_match_indev(skb, n->ifindex)) { n = rcu_dereference_bh(n->next); goto next_knode; } #endif #ifdef CONFIG_CLS_U32_PERF __this_cpu_inc(n->pf->rhit); #endif r = tcf_exts_exec(skb, &n->exts, res); if (r < 0) { n = rcu_dereference_bh(n->next); goto next_knode; } return r; } n = rcu_dereference_bh(n->next); goto next_knode; } /* PUSH */ if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; stack[sdepth].off = off; sdepth++; ht = rcu_dereference_bh(n->ht_down); sel = 0; if (ht->divisor) { __be32 *data, hdata; data = skb_header_pointer(skb, off + n->sel.hoff, 4, &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, n->fshift); } if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) goto next_ht; if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, hdata; data = skb_header_pointer(skb, off + n->sel.offoff, 2, &hdata); if (!data) goto out; off2 += ntohs(n->sel.offmask & *data) >> n->sel.offshift; } off2 &= ~3; } if (n->sel.flags & TC_U32_EAT) { off += off2; off2 = 0; } if (off < skb->len) goto next_ht; } /* POP */ if (sdepth--) { n = stack[sdepth].knode; ht = rcu_dereference_bh(n->ht_up); off = stack[sdepth].off; goto check_terminal; } out: return -1; deadloop: net_warn_ratelimited("cls_u32: dead loop\n"); return -1; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)41552.80%29.09%
Changli Gao11614.76%14.55%
Jamal Hadi Salim8610.94%313.64%
John Fastabend516.49%29.09%
Catalin(ux aka Dino) M. Boie374.71%14.55%
Stephen Hemminger253.18%29.09%
Thomas Graf232.93%313.64%
Sridhar Samudrala232.93%14.55%
Dan Carpenter20.25%14.55%
David S. Miller20.25%14.55%
Arnaldo Carvalho de Melo20.25%14.55%
Joe Perches10.13%14.55%
Linus Torvalds10.13%14.55%
Américo Wang10.13%14.55%
Eric Dumazet10.13%14.55%
Total786100.00%22100.00%


static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) if (ht->handle == handle) break; return ht; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)3360.00%133.33%
Eric Dumazet1629.09%133.33%
John Fastabend610.91%133.33%
Total55100.00%3100.00%


static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; sel = TC_U32_HASH(handle); if (sel > ht->divisor) goto out; for (n = rtnl_dereference(ht->ht[sel]); n; n = rtnl_dereference(n->next)) if (n->handle == handle) break; out: return n; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)6880.95%125.00%
Arnaldo Carvalho de Melo89.52%125.00%
John Fastabend67.14%125.00%
Eric Dumazet22.38%125.00%
Total84100.00%4100.00%


static unsigned long u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; if (TC_U32_HTID(handle) == TC_U32_ROOT) ht = rtnl_dereference(tp->root); else ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) return 0; if (TC_U32_KEY(handle) == 0) return (unsigned long)ht; return (unsigned long)u32_lookup_key(ht, handle); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)9396.88%150.00%
John Fastabend33.12%150.00%
Total96100.00%2100.00%


static u32 gen_new_htid(struct tc_u_common *tp_c) { int i = 0x800; /* hgenerator only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ do { if (++tp_c->hgenerator == 0x7FF) tp_c->hgenerator = 1; } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)7397.33%133.33%
Linus Torvalds11.33%133.33%
John Fastabend11.33%133.33%
Total75100.00%3100.00%


static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; tp_c = tp->q->u32_node; root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; root_ht->refcnt++; root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; root_ht->prio = tp->prio; if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; } tp_c->q = tp->q; tp->q->u32_node = tp_c; } tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)15283.98%120.00%
John Fastabend126.63%120.00%
Patrick McHardy84.42%120.00%
David S. Miller73.87%120.00%
Panagiotis Issaris21.10%120.00%
Total181100.00%5100.00%


static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); #endif #ifdef CONFIG_CLS_U32_MARK if (free_pf) free_percpu(n->pcpu_success); #endif kfree(n); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4151.25%114.29%
John Fastabend2430.00%342.86%
Jamal Hadi Salim1215.00%228.57%
Thomas Graf33.75%114.29%
Total80100.00%7100.00%

/* u32_delete_key_rcu should be called when free'ing a copied * version of a tc_u_knode obtained from u32_init_knode(). When * copies are obtained from u32_init_knode() the statistics are * shared between the old and new copies to allow readers to * continue to update the statistics during the copy. To support * this the u32_delete_key_rcu variant does not free the percpu * statistics. */
static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); u32_destroy_key(key->tp, key, false); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend37100.00%2100.00%
Total37100.00%2100.00%

/* u32_delete_key_freepf_rcu is the rcu callback variant * that free's the entire structure including the statistics * percpu variables. Only use this if the key is not a copy * returned by u32_init_knode(). See u32_delete_key_rcu() * for the variant that should be used with keys return from * u32_init_knode() */
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); u32_destroy_key(key->tp, key, true); }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend37100.00%2100.00%
Total37100.00%2100.00%


static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_knode __rcu **kp; struct tc_u_knode *pkp; struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); if (ht) { kp = &ht->ht[TC_U32_HASH(key->handle)]; for (pkp = rtnl_dereference(*kp); pkp; kp = &pkp->next, pkp = rtnl_dereference(*kp)) { if (pkp == key) { RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } } } WARN_ON(1); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)9064.75%337.50%
John Fastabend3726.62%337.50%
Américo Wang107.19%112.50%
Ilpo Järvinen21.44%112.50%
Total139100.00%8100.00%


static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); } }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend10398.10%375.00%
Daniel Borkmann21.90%125.00%
Total105100.00%4100.00%


static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; int err; if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; offload.cls_u32->command = TC_CLSU32_NEW_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; offload.cls_u32->hnode.prio = h->prio; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); if (tc_skip_sw(flags)) return err; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend12274.39%240.00%
Jakub Kiciński2112.80%120.00%
Sridhar Samudrala1911.59%120.00%
Daniel Borkmann21.22%120.00%
Total164100.00%5100.00%


static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; offload.cls_u32->hnode.prio = h->prio; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); } }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend13198.50%375.00%
Daniel Borkmann21.50%125.00%
Total133100.00%4100.00%


static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; int err; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; offload.cls_u32->knode.handle = n->handle; offload.cls_u32->knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK offload.cls_u32->knode.val = n->val; offload.cls_u32->knode.mask = n->mask; #else offload.cls_u32->knode.val = 0; offload.cls_u32->knode.mask = 0; #endif offload.cls_u32->knode.sel = &n->sel; offload.cls_u32->knode.exts = &n->exts; if (n->ht_down) offload.cls_u32->knode.link_handle = n->ht_down->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); if (!err) n->flags |= TCA_CLS_FLAGS_IN_HW; if (tc_skip_sw(flags)) return err; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
John Fastabend21683.08%342.86%
Sridhar Samudrala197.31%114.29%
Jakub Kiciński124.62%114.29%
Or Gerlitz114.23%114.29%
Daniel Borkmann20.77%114.29%
Total260100.00%7100.00%


static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; unsigned int h; for (h = 0; h <= ht->divisor; h++) { while ((n = rtnl_dereference(ht->ht[h])) != NULL) { RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)6762.04%116.67%
John Fastabend2523.15%350.00%
Américo Wang1513.89%116.67%
Eric Dumazet10.93%116.67%
Total108100.00%6100.00%


static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; WARN_ON(ht->refcnt); u32_clear_hnode(tp, ht); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; } } return -ENOENT; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8365.87%120.00%
John Fastabend4031.75%240.00%
Américo Wang21.59%120.00%
Ilpo Järvinen10.79%120.00%
Total126100.00%5100.00%


static bool ht_empty(struct tc_u_hnode *ht) { unsigned int h; for (h = 0; h <= ht->divisor; h++) if (rcu_access_pointer(ht->ht[h])) return false; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Cong Wang48100.00%1100.00%
Total48100.00%1100.00%


static bool u32_destroy(struct tcf_proto *tp, bool force) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); if (!force) { if (root_ht) { if (root_ht->refcnt > 1) return false; if (root_ht->refcnt == 1) { if (!ht_empty(root_ht)) return false; } } if (tp_c->refcnt > 1) return false; if (tp_c->refcnt == 1) { struct tc_u_hnode *ht; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) if (!ht_empty(ht)) return false; } } if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; tp->q->u32_node = NULL; for (ht = rtnl_dereference(tp_c->hlist); ht; ht = rtnl_dereference(ht->next)) { ht->refcnt--; u32_clear_hnode(tp, ht); } while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { RCU_INIT_POINTER(tp_c->hlist, ht->next); kfree_rcu(ht, rcu); } kfree(tp_c); } tp->data = NULL; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)12846.89%112.50%
Américo Wang6122.34%225.00%
Cong Wang5219.05%112.50%
John Fastabend186.59%112.50%
Jarek Poplawski72.56%112.50%
David S. Miller51.83%112.50%
Ilpo Järvinen20.73%112.50%
Total273100.00%8100.00%


static int u32_delete(struct tcf_proto *tp