Release 4.11 net/netfilter/nfnetlink_queue.c
/*
* This is a module which is used for queueing packets and communicating with
* userspace via nfnetlink.
*
* (C) 2005 by Harald Welte <laforge@netfilter.org>
* (C) 2007 by Patrick McHardy <kaber@trash.net>
*
* Based on the old ipv4-only ip_queue.c:
* (C) 2000-2002 James Morris <jmorris@intercode.com.au>
* (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/netfilter/nf_queue.h>
#include <net/netns/generic.h>
#include <linux/atomic.h>
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
#define NFQNL_QMAX_DEFAULT 1024
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
* includes the header length. Thus, the maximum packet length that we
* support is 65531 bytes. We send truncated packets if the specified length
* is larger than that. Userspace can check for presence of NFQA_CAP_LEN
* attribute to detect truncation.
*/
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
unsigned int queue_user_dropped;
u_int16_t queue_num; /* number of this queue */
u_int8_t copy_mode;
u_int32_t flags; /* Set using NFQA_CFG_FLAGS */
/*
* Following fields are dirtied for each queued packet,
* keep them in same cache line if possible.
*/
spinlock_t lock ____cacheline_aligned_in_smp;
unsigned int queue_total;
unsigned int id_sequence; /* 'sequence' of pkt ids */
struct list_head queue_list; /* packets in queue */
};
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
static unsigned int nfnl_queue_net_id __read_mostly;
#define INSTANCE_BUCKETS 16
struct nfnl_queue_net {
spinlock_t instances_lock;
struct hlist_head instance_table[INSTANCE_BUCKETS];
};
static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
{
return net_generic(net, nfnl_queue_net_id);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gao Feng | 21 | 100.00% | 1 | 100.00% |
Total | 21 | 100.00% | 1 | 100.00% |
static inline u_int8_t instance_hashfn(u_int16_t queue_num)
{
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 22 | 95.65% | 1 | 50.00% |
Pablo Neira Ayuso | 1 | 4.35% | 1 | 50.00% |
Total | 23 | 100.00% | 2 | 100.00% |
static struct nfqnl_instance *
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
struct hlist_head *head;
struct nfqnl_instance *inst;
head = &q->instance_table[instance_hashfn(queue_num)];
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->queue_num == queue_num)
return inst;
}
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 54 | 85.71% | 1 | 33.33% |
Gao Feng | 7 | 11.11% | 1 | 33.33% |
Patrick McHardy | 2 | 3.17% | 1 | 33.33% |
Total | 63 | 100.00% | 3 | 100.00% |
static struct nfqnl_instance *
instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
int err;
spin_lock(&q->instances_lock);
if (instance_lookup(q, queue_num)) {
err = -EEXIST;
goto out_unlock;
}
inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
if (!inst) {
err = -ENOMEM;
goto out_unlock;
}
inst->queue_num = queue_num;
inst->peer_portid = portid;
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
inst->copy_range = NFQNL_MAX_COPY_RANGE;
inst->copy_mode = NFQNL_COPY_NONE;
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
if (!try_module_get(THIS_MODULE)) {
err = -EAGAIN;
goto out_free;
}
h = instance_hashfn(queue_num);
hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
spin_unlock(&q->instances_lock);
return inst;
out_free:
kfree(inst);
out_unlock:
spin_unlock(&q->instances_lock);
return ERR_PTR(err);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 135 | 65.22% | 3 | 30.00% |
Patrick McHardy | 49 | 23.67% | 2 | 20.00% |
Gao Feng | 15 | 7.25% | 1 | 10.00% |
Eric W. Biedermann | 3 | 1.45% | 1 | 10.00% |
Hideaki Yoshifuji / 吉藤英明 | 3 | 1.45% | 1 | 10.00% |
Richard Weinberger | 1 | 0.48% | 1 | 10.00% |
Florian Westphal | 1 | 0.48% | 1 | 10.00% |
Total | 207 | 100.00% | 10 | 100.00% |
static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
unsigned long data);
static void
instance_destroy_rcu(struct rcu_head *head)
{
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
rcu);
nfqnl_flush(inst, NULL, 0);
kfree(inst);
module_put(THIS_MODULE);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 27 | 60.00% | 1 | 33.33% |
Patrick McHardy | 18 | 40.00% | 2 | 66.67% |
Total | 45 | 100.00% | 3 | 100.00% |
static void
__instance_destroy(struct nfqnl_instance *inst)
{
hlist_del_rcu(&inst->hlist);
call_rcu(&inst->rcu, instance_destroy_rcu);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 16 | 55.17% | 1 | 50.00% |
Patrick McHardy | 13 | 44.83% | 1 | 50.00% |
Total | 29 | 100.00% | 2 | 100.00% |
static void
instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
{
spin_lock(&q->instances_lock);
__instance_destroy(inst);
spin_unlock(&q->instances_lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 15 | 40.54% | 1 | 33.33% |
Patrick McHardy | 13 | 35.14% | 1 | 33.33% |
Gao Feng | 9 | 24.32% | 1 | 33.33% |
Total | 37 | 100.00% | 3 | 100.00% |
static inline void
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
list_add_tail(&entry->list, &queue->queue_list);
queue->queue_total++;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 33 | 94.29% | 1 | 33.33% |
Patrick McHardy | 2 | 5.71% | 2 | 66.67% |
Total | 35 | 100.00% | 3 | 100.00% |
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
list_del(&entry->list);
queue->queue_total--;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Florian Westphal | 28 | 96.55% | 1 | 50.00% |
Harald Welte | 1 | 3.45% | 1 | 50.00% |
Total | 29 | 100.00% | 2 | 100.00% |
static struct nf_queue_entry *
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
{
struct nf_queue_entry *entry = NULL, *i;
spin_lock_bh(&queue->lock);
list_for_each_entry(i, &queue->queue_list, list) {
if (i->id == id) {
entry = i;
break;
}
}
if (entry)
__dequeue_entry(queue, entry);
spin_unlock_bh(&queue->lock);
return entry;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Harald Welte | 40 | 48.78% | 1 | 25.00% |
Patrick McHardy | 38 | 46.34% | 2 | 50.00% |
Florian Westphal | 4 | 4.88% | 1 | 25.00% |
Total | 82 | 100.00% | 4 | 100.00% |
static void
nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
{
struct nf_queue_entry *entry, *next;
spin_lock_bh(&queue->lock);
list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
if (!cmpfn || cmpfn(entry, data)) {
list_del(&entry->list);
queue->queue_total--;
nf_reinject(entry, NF_DROP);
}
}
spin_unlock_bh(&queue->lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Patrick McHardy | 55 | 62.50% | 3 | 75.00% |
Harald Welte | 33 | 37.50% | 1 | 25.00% |
Total | 88 | 100.00% | 4 | 100.00% |
static int
nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
bool csum_verify)
{
__u32 flags = 0;
if (packet->ip_summed == CHECKSUM_PARTIAL)
flags = NFQA_SKB_CSUMNOTREADY;
else if (csum_verify)
flags = NFQA_SKB_CSUM_NOTVERIFIED;
if (skb_is_gso(packet))
flags |= NFQA_SKB_GSO;
return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Florian Westphal | 72 | 98.63% | 2 | 66.67% |
Harald Welte | 1 | 1.37% | 1 | 33.33% |
Total | 73 | 100.00% | 3 | 100.00% |
static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
{
const struct cred *cred;
if (!sk_fullsock(sk))
return 0;
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
cred = sk->sk_socket->file->f_cred;
if (nla_put_be32(skb, NFQA_UID,
htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
goto nla_put_failure;
if (nla_put_be32(skb, NFQA_GID,
htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
goto nla_put_failure;
}
read_unlock_bh(&sk->sk_callback_lock);
return 0;
nla_put_failure:
read_unlock_bh(&sk->sk_callback_lock);
return -1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Valentina Giusti | 136 | 97.14% | 1 | 50.00% |
Eric Dumazet | 4 | 2.86% | 1 | 50.00% |
Total | 140 | 100.00% | 2 | 100.00% |
static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
{
u32 seclen = 0;
#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
if (!skb || !sk_fullsock(skb->sk))
return 0;
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->secmark)
security_secid_to_secctx(skb->secmark, secdata, &seclen);
read_unlock_bh(&skb->sk->sk_callback_lock);
#endif
return seclen;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Roman Kubiak | 86 | 100.00% | 1 | 100.00% |
Total | 86 | 100.00% | 1 | 100.00% |
static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
{
struct sk_buff *entskb = entry->skb;
u32 nlalen = 0;
if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
return 0;
if (skb_vlan_tag_present(entskb))
nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
nla_total_size(sizeof(__be16)));
if (entskb->network_header > entskb->mac_header)
nlalen += nla_total_size((entskb->network_header -
entskb->mac_header));
return nlalen;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stephane Bryant | 100 | 100.00% | 1 | 100.00% |
Total | 100 | 100.00% | 1 | 100.00% |
static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
{
struct sk_buff *entskb = entry->skb;
if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
return 0;
if (skb_vlan_tag_present(entskb)) {
struct nlattr *nest;
nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED);
if (!nest)
goto nla_put_failure;
if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) ||
nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto))
goto nla_put_failure;
nla_nest_end(skb, nest);
}
if (entskb->mac_header < entskb->network_header) {
int len = (int)(entskb->network_header - entskb->mac_header);
if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
goto nla_put_failure;
}
return 0;
nla_put_failure:
return -1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stephane Bryant | 170 | 100.00% | 1 | 100.00% |
Total | 170 | 100.00% | 1 | 100.00% |
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
__be32 **packet_id_ptr)
{
size_t size;
size_t data_len = 0, cap_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct sk_buff *entskb = entry->skb;
struct net_device *indev;
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
if (entskb->tstamp)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
size += nfqnl_get_bridge_size(entry);
if (entry->state.hook <= NF_INET_FORWARD ||
(entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
csum_verify = !skb_csum_unnecessary(entskb);
else
csum_verify = false;
outdev = entry->state.out;
switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
case NFQNL_COPY_META:
case NFQNL_COPY_NONE:
break;
case NFQNL_COPY_PACKET:
if (!(queue->flags & NFQA_CFG_F_GSO) &&
entskb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(entskb))
return NULL;
data_len = ACCESS_ONCE(queue->copy_range);
if (data_len > entskb->len)
data_len = entskb->len;
hlen = skb_zerocopy_headlen(entskb);
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
break;
}
nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (queue->flags & NFQA_CFG_F_CONNTRACK) {
if (nfnl_ct != NULL) {
ct = nfnl_ct->get_ct(entskb, &ctinfo);
if (ct != NULL)
size += nfnl_ct->build_size(ct);
}
}
if (queue->flags & NFQA_CFG_F_UID_GID) {
size += (nla_total_size(sizeof(u_int32_t)) /* uid */
+ nla_total_size(sizeof(u_int32_t))); /* gid */
}
if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
seclen = nfqnl_get_sk_secctx(entskb, &secdata);
if (seclen)
size += nla_total_size(seclen);
}
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
goto nlmsg_failure;
}
nlh = nlmsg_put(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg), 0);
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
goto nlmsg_failure;
}
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = entry->state.pf;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(queue->queue_num);
nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
pmsg = nla_data(nla);
pmsg->hw_protocol = entskb->protocol;
pmsg->hook = entry->state.hook;
*packet_id_ptr = &pmsg->packet_id;
indev = entry->state.in;
if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
if (entry->state.pf == PF_BRIDGE) {
/* Case 1: indev is physical input device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
htonl(indev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by __nf_queue */
nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
int physinif;
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
physinif = nf_bridge_get_physinif(entskb);
if (physinif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
htonl(physinif)))
goto nla_put_failure;
}
#endif
}
if (outdev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
goto nla_put_failure;
#else
if (entry->state.pf == PF_BRIDGE) {
/* Case 1: outdev is physical output device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
htonl(outdev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by __nf_queue */
nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
int physoutif;
/* Case 2: outdev is bridge group, we need to look for
* physical output device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
physoutif = nf_bridge_get_physoutif(entskb);
if (physoutif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
htonl(physoutif)))
goto nla_put_failure;
}
#endif
}
if (entskb->mark &&
nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
goto nla_put_failure;
if (indev && entskb->dev &&
entskb->mac_header != entskb->network_header) {
struct nfqnl_msg_packet_hw phw;
int len;
memset(&phw, 0, sizeof(phw));
len = dev_parse_header(entskb, phw.hw_addr);
if (len) {
phw.hw_addrlen = htons(len);
if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
goto nla_put_failure;
}
}
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
if (entskb->tstamp) {
struct nfqnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
}
if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
goto nla_put_failure;
if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
goto nla_put_failure;
if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
goto nla_put_failure;
if (cap_len > data_len &&
nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
goto nla_put_failure;
if (nfqnl_put_packet_info(skb, entskb, csum_verify))
goto nla_put_failure;
if (data_len) {
struct nlattr *nla;
if (skb_tailroom(skb) < sizeof(*nla) + hlen)
goto nla_put_failure;
nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = nla_attr_size(data_len);
if (skb_zerocopy(skb, entskb, data_len, hlen))
goto nla_put_failure;
}
nlh->nlmsg_len = skb->len;
if (seclen)
security_release_secctx(secdata, seclen);
return skb;
nla_put_failure:
skb_tx_error(entskb);
kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
nlmsg_failure:
if (seclen)
security_release_secctx(secdata, seclen);