cregit-Linux how code gets into the kernel

Release 4.11 net/ipv4/tcp_cong.c

Directory: net/ipv4
/*
 * Pluggable TCP congestion control support and newReno
 * congestion control.
 * Based on ideas from I/O scheduler support and Web100.
 *
 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
 */


#define pr_fmt(fmt) "TCP: " fmt

#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>

static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);

/* Simple linear search, don't expect many entries! */

static struct tcp_congestion_ops *tcp_ca_find(const char *name) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (strcmp(e->name, name) == 0) return e; } return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger47100.00%2100.00%
Total47100.00%2100.00%

/* Must be called with rcu lock held */
static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) { const struct tcp_congestion_ops *ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); ca = tcp_ca_find(name); } #endif return ca; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann65100.00%1100.00%
Total65100.00%1100.00%

/* Simple linear search, not much in here. */
struct tcp_congestion_ops *tcp_ca_find_key(u32 key) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (e->key == key) return e; } return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann39100.00%1100.00%
Total39100.00%1100.00%

/* * Attach new congestion control algorithm to the list * of available options. */
int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { pr_notice("%s already registered or non-unique key\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); pr_debug("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger9866.22%337.50%
Daniel Borkmann3422.97%112.50%
Florian Westphal64.05%112.50%
Yuchung Cheng64.05%112.50%
Joe Perches42.70%225.00%
Total148100.00%8100.00%

EXPORT_SYMBOL_GPL(tcp_register_congestion_control); /* * Remove congestion control algorithm, called from * the module's remove function. Module ref counts are used * to ensure that this can't be done till all sockets using * that method are closed. */
void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) { spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module gets removed entirely. * * A try_module_get() should fail by now as our module is * in "going" state since no refs are held anymore and * module_exit() handler being called. */ synchronize_rcu(); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger3088.24%150.00%
Daniel Borkmann411.76%150.00%
Total34100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; might_sleep(); rcu_read_lock(); ca = __tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; } rcu_read_unlock(); return key; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann65100.00%2100.00%
Total65100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
char *tcp_ca_get_name_by_key(u32 key, char *buffer) { const struct tcp_congestion_ops *ca; char *ret = NULL; rcu_read_lock(); ca = tcp_ca_find_key(key); if (ca) ret = strncpy(buffer, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann58100.00%1100.00%
Total58100.00%1100.00%

EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (likely(try_module_get(ca->owner))) { icsk->icsk_ca_ops = ca; goto out; } /* Fallback to next available. The last really * guaranteed fallback is Reno from this list. */ } out: rcu_read_unlock(); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger4342.16%240.00%
Florian Westphal2625.49%120.00%
Eric Dumazet1918.63%120.00%
Arnaldo Carvalho de Melo1413.73%120.00%
Total102100.00%5100.00%


void tcp_init_congestion_control(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); }

Contributors

PersonTokensPropCommitsCommitProp
Florian Westphal2035.71%125.00%
Eric Dumazet1832.14%125.00%
Stephen Hemminger1323.21%125.00%
Arnaldo Carvalho de Melo58.93%125.00%
Total56100.00%4100.00%


static void tcp_reinit_congestion_control(struct sock *sk, const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (sk->sk_state != TCP_CLOSE) tcp_init_congestion_control(sk); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann4257.53%120.00%
Florian Westphal1621.92%120.00%
Wei Wang810.96%120.00%
Neal Cardwell68.22%120.00%
Eric Dumazet11.37%120.00%
Total73100.00%5100.00%

/* Manage refcounts on socket close. */
void tcp_cleanup_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); module_put(icsk->icsk_ca_ops->owner); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger2758.70%150.00%
Arnaldo Carvalho de Melo1941.30%150.00%
Total46100.00%2100.00%

/* Used by sysctl to change default congestion control */
int tcp_set_default_congestion_control(const char *name) { struct tcp_congestion_ops *ca; int ret = -ENOENT; spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { spin_unlock(&tcp_cong_list_lock); request_module("tcp_%s", name); spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); } #endif if (ca) { ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } spin_unlock(&tcp_cong_list_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger11298.25%466.67%
Eric Paris10.88%116.67%
Johannes Berg10.88%116.67%
Total114100.00%6100.00%

/* Set default value from kernel configuration at bootup */
static int __init tcp_congestion_default(void) { return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger15100.00%1100.00%
Total15100.00%1100.00%

late_initcall(tcp_congestion_default); /* Build string with list of available congestion control values */
void tcp_get_available_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger65100.00%1100.00%
Total65100.00%1100.00%

/* Get current default congestion control */
void tcp_get_default_congestion_control(char *name) { struct tcp_congestion_ops *ca; /* We will always have reno... */ BUG_ON(list_empty(&tcp_cong_list)); rcu_read_lock(); ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger55100.00%1100.00%
Total55100.00%1100.00%

/* Built list of non-restricted congestion control values */
void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger82100.00%2100.00%
Total82100.00%2100.00%

/* Change list of non-restricted congestion control */
int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; char *saved_clone, *clone, *name; int ret = 0; saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; spin_lock(&tcp_cong_list_lock); /* pass 1 check for bad entries */ while ((name = strsep(&clone, " ")) && *name) { ca = tcp_ca_find(name); if (!ca) { ret = -ENOENT; goto out; } } /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); kfree(saved_clone); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger16294.19%266.67%
Julia Lawall105.81%133.33%
Total172100.00%3100.00%

/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; int err = 0; if (icsk->icsk_ca_dst_locked) return -EPERM; rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; else if (!try_module_get(ca->owner)) err = -EBUSY; else tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger10369.13%337.50%
Arnaldo Carvalho de Melo1510.07%112.50%
Daniel Borkmann1510.07%225.00%
Eric W. Biedermann85.37%112.50%
Neal Cardwell85.37%112.50%
Total149100.00%8100.00%

/* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but * something better;) a packet is only considered (s)acked in its entirety to * defend the ACK attacks described in the RFC. Slow start processes a stretch * ACK of degree N as if N acks of degree 1 are received back to back except * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); return acked; }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger1528.30%114.29%
Yuchung Cheng1324.53%228.57%
Neal Cardwell1222.64%114.29%
Eric Dumazet1120.75%228.57%
John Heffner23.77%114.29%
Total53100.00%7100.00%

EXPORT_SYMBOL_GPL(tcp_slow_start); /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), * for every packet that was ACKed. */
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; tp->snd_cwnd++; } tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; tp->snd_cwnd += delta; } tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); }

Contributors

PersonTokensPropCommitsCommitProp
Neal Cardwell5660.87%375.00%
Ilpo Järvinen3639.13%125.00%
Total92100.00%4100.00%

EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* * TCP Reno congestion control * This is special case used for fallback as well. */ /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } /* In dangerous area, increase slowly. */ tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger3852.78%440.00%
Neal Cardwell1216.67%220.00%
Arnaldo Carvalho de Melo1216.67%110.00%
Yuchung Cheng68.33%220.00%
Ilpo Järvinen45.56%110.00%
Total72100.00%10100.00%

EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */
u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger2060.61%150.00%
Arnaldo Carvalho de Melo1339.39%150.00%
Total33100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd, tp->snd_ssthresh << 1); }

Contributors

PersonTokensPropCommitsCommitProp
Florian Westphal35100.00%1100.00%
Total35100.00%1100.00%

EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = tcp_reno_undo_cwnd, };

Overall Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger102756.55%1431.11%
Daniel Borkmann33718.56%36.67%
Florian Westphal1136.22%36.67%
Neal Cardwell955.23%511.11%
Arnaldo Carvalho de Melo784.30%12.22%
Eric Dumazet492.70%36.67%
Ilpo Järvinen452.48%12.22%
Yuchung Cheng261.43%48.89%
Joe Perches110.61%24.44%
Julia Lawall100.55%12.22%
Wei Wang80.44%12.22%
Eric W. Biedermann80.44%12.22%
Tejun Heo30.17%12.22%
John Heffner20.11%12.22%
Fabian Frederick10.06%12.22%
Robert P. J. Day10.06%12.22%
Eric Paris10.06%12.22%
Johannes Berg10.06%12.22%
Total1816100.00%45100.00%
Directory: net/ipv4
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.