cregit-Linux how code gets into the kernel

Release 4.8 net/ipv4/tcp_cong.c

Directory: net/ipv4
/*
 * Pluggable TCP congestion control support and newReno
 * congestion control.
 * Based on ideas from I/O scheduler support and Web100.
 *
 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
 */


#define pr_fmt(fmt) "TCP: " fmt

#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>

static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);

/* Simple linear search, don't expect many entries! */

static struct tcp_congestion_ops *tcp_ca_find(const char *name) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (strcmp(e->name, name) == 0) return e; } return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger47100.00%2100.00%
Total47100.00%2100.00%

/* Must be called with rcu lock held */
static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) { const struct tcp_congestion_ops *ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); ca = tcp_ca_find(name); } #endif return ca; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann65100.00%1100.00%
Total65100.00%1100.00%

/* Simple linear search, not much in here. */
struct tcp_congestion_ops *tcp_ca_find_key(u32 key) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (e->key == key) return e; } return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann39100.00%1100.00%
Total39100.00%1100.00%

/* * Attach new congestion control algorithm to the list * of available options. */
int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ if (!ca->ssthresh || !ca->cong_avoid) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { pr_notice("%s already registered or non-unique key\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); pr_debug("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger9972.26%350.00%
daniel borkmanndaniel borkmann3424.82%116.67%
joe perchesjoe perches42.92%233.33%
Total137100.00%6100.00%

EXPORT_SYMBOL_GPL(tcp_register_congestion_control); /* * Remove congestion control algorithm, called from * the module's remove function. Module ref counts are used * to ensure that this can't be done till all sockets using * that method are closed. */
void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) { spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module gets removed entirely. * * A try_module_get() should fail by now as our module is * in "going" state since no refs are held anymore and * module_exit() handler being called. */ synchronize_rcu(); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger3088.24%150.00%
daniel borkmanndaniel borkmann411.76%150.00%
Total34100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; might_sleep(); rcu_read_lock(); ca = __tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; } rcu_read_unlock(); return key; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann65100.00%2100.00%
Total65100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
char *tcp_ca_get_name_by_key(u32 key, char *buffer) { const struct tcp_congestion_ops *ca; char *ret = NULL; rcu_read_lock(); ca = tcp_ca_find_key(key); if (ca) ret = strncpy(buffer, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann58100.00%1100.00%
Total58100.00%1100.00%

EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (likely(try_module_get(ca->owner))) { icsk->icsk_ca_ops = ca; goto out; } /* Fallback to next available. The last really * guaranteed fallback is Reno from this list. */ } out: rcu_read_unlock(); /* Clear out private data before diag gets it and * the ca has not been initialized. */ if (ca->get_info) memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger4339.45%240.00%
florian westphalflorian westphal3330.28%120.00%
eric dumazeteric dumazet1917.43%120.00%
arnaldo carvalho de meloarnaldo carvalho de melo1412.84%120.00%
Total109100.00%5100.00%


void tcp_init_congestion_control(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); }

Contributors

PersonTokensPropCommitsCommitProp
florian westphalflorian westphal2035.71%125.00%
eric dumazeteric dumazet1832.14%125.00%
stephen hemmingerstephen hemminger1323.21%125.00%
arnaldo carvalho de meloarnaldo carvalho de melo58.93%125.00%
Total56100.00%4100.00%


static void tcp_reinit_congestion_control(struct sock *sk, const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; if (sk->sk_state != TCP_CLOSE) tcp_init_congestion_control(sk); }

Contributors

PersonTokensPropCommitsCommitProp
daniel borkmanndaniel borkmann5087.72%133.33%
neal cardwellneal cardwell610.53%133.33%
eric dumazeteric dumazet11.75%133.33%
Total57100.00%3100.00%

/* Manage refcounts on socket close. */
void tcp_cleanup_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); module_put(icsk->icsk_ca_ops->owner); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger2758.70%150.00%
arnaldo carvalho de meloarnaldo carvalho de melo1941.30%150.00%
Total46100.00%2100.00%

/* Used by sysctl to change default congestion control */
int tcp_set_default_congestion_control(const char *name) { struct tcp_congestion_ops *ca; int ret = -ENOENT; spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { spin_unlock(&tcp_cong_list_lock); request_module("tcp_%s", name); spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); } #endif if (ca) { ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } spin_unlock(&tcp_cong_list_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger11298.25%466.67%
eric pariseric paris10.88%116.67%
johannes bergjohannes berg10.88%116.67%
Total114100.00%6100.00%

/* Set default value from kernel configuration at bootup */
static int __init tcp_congestion_default(void) { return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger15100.00%1100.00%
Total15100.00%1100.00%

late_initcall(tcp_congestion_default); /* Build string with list of available congestion control values */
void tcp_get_available_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger65100.00%1100.00%
Total65100.00%1100.00%

/* Get current default congestion control */
void tcp_get_default_congestion_control(char *name) { struct tcp_congestion_ops *ca; /* We will always have reno... */ BUG_ON(list_empty(&tcp_cong_list)); rcu_read_lock(); ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger55100.00%1100.00%
Total55100.00%1100.00%

/* Built list of non-restricted congestion control values */
void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger82100.00%2100.00%
Total82100.00%2100.00%

/* Change list of non-restricted congestion control */
int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; char *saved_clone, *clone, *name; int ret = 0; saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; spin_lock(&tcp_cong_list_lock); /* pass 1 check for bad entries */ while ((name = strsep(&clone, " ")) && *name) { ca = tcp_ca_find(name); if (!ca) { ret = -ENOENT; goto out; } } /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); kfree(saved_clone); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger16294.19%266.67%
julia lawalljulia lawall105.81%133.33%
Total172100.00%3100.00%

/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; int err = 0; if (icsk->icsk_ca_dst_locked) return -EPERM; rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; else if (!try_module_get(ca->owner)) err = -EBUSY; else tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger10369.13%337.50%
arnaldo carvalho de meloarnaldo carvalho de melo1510.07%112.50%
daniel borkmanndaniel borkmann1510.07%225.00%
eric w. biedermaneric w. biederman85.37%112.50%
neal cardwellneal cardwell85.37%112.50%
Total149100.00%8100.00%

/* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but * something better;) a packet is only considered (s)acked in its entirety to * defend the ACK attacks described in the RFC. Slow start processes a stretch * ACK of degree N as if N acks of degree 1 are received back to back except * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); return acked; }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger1528.30%114.29%
yuchung chengyuchung cheng1324.53%228.57%
neal cardwellneal cardwell1222.64%114.29%
eric dumazeteric dumazet1120.75%228.57%
john heffnerjohn heffner23.77%114.29%
Total53100.00%7100.00%

EXPORT_SYMBOL_GPL(tcp_slow_start); /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), * for every packet that was ACKed. */
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; tp->snd_cwnd++; } tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; tp->snd_cwnd += delta; } tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); }

Contributors

PersonTokensPropCommitsCommitProp
neal cardwellneal cardwell5660.87%375.00%
ilpo jarvinenilpo jarvinen3639.13%125.00%
Total92100.00%4100.00%

EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* * TCP Reno congestion control * This is special case used for fallback as well. */ /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } /* In dangerous area, increase slowly. */ tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger3852.78%440.00%
neal cardwellneal cardwell1216.67%220.00%
arnaldo carvalho de meloarnaldo carvalho de melo1216.67%110.00%
yuchung chengyuchung cheng68.33%220.00%
ilpo jarvinenilpo jarvinen45.56%110.00%
Total72100.00%10100.00%

EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */
u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); }

Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger2060.61%150.00%
arnaldo carvalho de meloarnaldo carvalho de melo1339.39%150.00%
Total33100.00%2100.00%

EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, };

Overall Contributors

PersonTokensPropCommitsCommitProp
stephen hemmingerstephen hemminger102858.71%1434.15%
daniel borkmanndaniel borkmann34519.70%37.32%
neal cardwellneal cardwell955.43%512.20%
arnaldo carvalho de meloarnaldo carvalho de melo784.45%12.44%
florian westphalflorian westphal533.03%12.44%
eric dumazeteric dumazet492.80%37.32%
ilpo jarvinenilpo jarvinen452.57%12.44%
yuchung chengyuchung cheng201.14%37.32%
joe perchesjoe perches110.63%24.88%
julia lawalljulia lawall100.57%12.44%
eric w. biedermaneric w. biederman80.46%12.44%
tejun heotejun heo30.17%12.44%
john heffnerjohn heffner20.11%12.44%
fabian frederickfabian frederick10.06%12.44%
robert p. j. dayrobert p. j. day10.06%12.44%
johannes bergjohannes berg10.06%12.44%
eric pariseric paris10.06%12.44%
Total1751100.00%41100.00%
Directory: net/ipv4
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.