cregit-Linux how code gets into the kernel

Release 4.17 mm/page-writeback.c

Directory: mm
/*
 * mm/page-writeback.c
 *
 * Copyright (C) 2002, Linus Torvalds.
 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
 *
 * Contains functions related to writing back dirty pages at the
 * address_space level.
 *
 * 10Apr2002    Andrew Morton
 *              Initial version
 */

#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
#include <linux/pagevec.h>
#include <linux/timer.h>
#include <linux/sched/rt.h>
#include <linux/sched/signal.h>
#include <linux/mm_inline.h>
#include <trace/events/writeback.h>

#include "internal.h"

/*
 * Sleep at most 200ms at a time in balance_dirty_pages().
 */

#define MAX_PAUSE		max(HZ/5, 1)

/*
 * Try to keep balance_dirty_pages() call intervals higher than this many pages
 * by raising pause time to max_pause when falls below it.
 */

#define DIRTY_POLL_THRESH	(128 >> (PAGE_SHIFT - 10))

/*
 * Estimate write bandwidth at 200ms intervals.
 */

#define BANDWIDTH_INTERVAL	max(HZ/5, 1)


#define RATELIMIT_CALC_SHIFT	10

/*
 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
 * will look to see if it needs to force writeback or throttling.
 */

static long ratelimit_pages = 32;

/* The following parameters are exported via /proc/sys/vm */

/*
 * Start background writeback (via writeback threads) at this percentage
 */

int dirty_background_ratio = 10;

/*
 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
 * dirty_background_ratio * the amount of dirtyable memory
 */

unsigned long dirty_background_bytes;

/*
 * free highmem will not be subtracted from the total free memory
 * for calculating free ratios if vm_highmem_is_dirtyable is true
 */

int vm_highmem_is_dirtyable;

/*
 * The generator of dirty data starts writeback at this percentage
 */

int vm_dirty_ratio = 20;

/*
 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
 * vm_dirty_ratio * the amount of dirtyable memory
 */

unsigned long vm_dirty_bytes;

/*
 * The interval between `kupdate'-style writebacks
 */

unsigned int dirty_writeback_interval = 5 * 100; 
/* centiseconds */

EXPORT_SYMBOL_GPL(dirty_writeback_interval);

/*
 * The longest time for which data is allowed to remain dirty
 */

unsigned int dirty_expire_interval = 30 * 100; 
/* centiseconds */

/*
 * Flag that makes the machine dump writes/reads and block dirtyings.
 */

int block_dump;

/*
 * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
 * a full sync is triggered after this time elapses without any disk activity.
 */

int laptop_mode;

EXPORT_SYMBOL(laptop_mode);

/* End of sysctl-exported parameters */


struct wb_domain global_wb_domain;

/* consolidated parameters for balance_dirty_pages() and its subroutines */

struct dirty_throttle_control {
#ifdef CONFIG_CGROUP_WRITEBACK
	
struct wb_domain	*dom;
	
struct dirty_throttle_control *gdtc;	/* only set in memcg dtc's */
#endif
	
struct bdi_writeback	*wb;
	
struct fprop_local_percpu *wb_completions;

	
unsigned long		avail;		/* dirtyable */
	
unsigned long		dirty;		/* file_dirty + write + nfs */
	
unsigned long		thresh;		/* dirty threshold */
	
unsigned long		bg_thresh;	/* dirty background threshold */

	
unsigned long		wb_dirty;	/* per-wb counterparts */
	
unsigned long		wb_thresh;
	
unsigned long		wb_bg_thresh;

	
unsigned long		pos_ratio;
};

/*
 * Length of period for aging writeout fractions of bdis. This is an
 * arbitrarily chosen number. The longer the period, the slower fractions will
 * reflect changes in current writeout rate.
 */

#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)

#ifdef CONFIG_CGROUP_WRITEBACK


#define GDTC_INIT(__wb)		.wb = (__wb),                           \
                                .dom = &global_wb_domain,               \
                                .wb_completions = &(__wb)->completions


#define GDTC_INIT_NO_WB		.dom = &global_wb_domain


#define MDTC_INIT(__wb, __gdtc)	.wb = (__wb),                           \
                                .dom = mem_cgroup_wb_domain(__wb),      \
                                .wb_completions = &(__wb)->memcg_completions, \
                                .gdtc = __gdtc


static bool mdtc_valid(struct dirty_throttle_control *dtc) { return dtc->dom; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo1487.50%150.00%
Jan Kara212.50%150.00%
Total16100.00%2100.00%


static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc) { return dtc->dom; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo18100.00%1100.00%
Total18100.00%1100.00%


static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc) { return mdtc->gdtc; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo18100.00%1100.00%
Total18100.00%1100.00%


static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb) { return &wb->memcg_completions; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo19100.00%1100.00%
Total19100.00%1100.00%


static void wb_min_max_ratio(struct bdi_writeback *wb, unsigned long *minp, unsigned long *maxp) { unsigned long this_bw = wb->avg_write_bandwidth; unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); unsigned long long min = wb->bdi->min_ratio; unsigned long long max = wb->bdi->max_ratio; /* * @wb may already be clean by the time control reaches here and * the total may not include its bw. */ if (this_bw < tot_bw) { if (min) { min *= this_bw; do_div(min, tot_bw); } if (max < 100) { max *= this_bw; do_div(max, tot_bw); } } *minp = min; *maxp = max; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo11595.83%150.00%
Jan Kara54.17%150.00%
Total120100.00%2100.00%

#else /* CONFIG_CGROUP_WRITEBACK */ #define GDTC_INIT(__wb) .wb = (__wb), \ .wb_completions = &(__wb)->completions #define GDTC_INIT_NO_WB #define MDTC_INIT(__wb, __gdtc)
static bool mdtc_valid(struct dirty_throttle_control *dtc) { return false; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo1392.86%150.00%
Jan Kara17.14%150.00%
Total14100.00%2100.00%


static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc) { return &global_wb_domain; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo17100.00%1100.00%
Total17100.00%1100.00%


static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc) { return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo16100.00%1100.00%
Total16100.00%1100.00%


static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb) { return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo16100.00%1100.00%
Total16100.00%1100.00%


static void wb_min_max_ratio(struct bdi_writeback *wb, unsigned long *minp, unsigned long *maxp) { *minp = wb->bdi->min_ratio; *maxp = wb->bdi->max_ratio; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo3794.87%150.00%
Jan Kara25.13%150.00%
Total39100.00%2100.00%

#endif /* CONFIG_CGROUP_WRITEBACK */ /* * In a memory zone, there is a certain amount of pages we consider * available for the page cache, which is essentially the number of * free and reclaimable pages, minus some zone reserves to protect * lowmem and the ability to uphold the zone's watermarks without * requiring writeback. * * This number of dirtyable pages is the base value of which the * user-configurable dirty ratio is the effictive number of pages that * are allowed to be actually dirtied. Per individual zone, or * globally by using the sum of dirtyable pages over all zones. * * Because the user is allowed to specify the dirty limit globally as * absolute number of bytes, calculating the per-zone dirty limit can * require translating the configured limit into a percentage of * global dirtyable memory first. */ /** * node_dirtyable_memory - number of dirtyable pages in a node * @pgdat: the node * * Returns the node's number of pages potentially available for dirty * page cache. This is the base value for the per-node dirty limits. */
static unsigned long node_dirtyable_memory(struct pglist_data *pgdat) { unsigned long nr_pages = 0; int z; for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = pgdat->node_zones + z; if (!populated_zone(zone)) continue; nr_pages += zone_page_state(zone, NR_FREE_PAGES); } /* * Pages reserved for the kernel should not be considered * dirtyable, to prevent a situation where reclaim has to * clean pages in order to balance the zones. */ nr_pages -= min(nr_pages, pgdat->totalreserve_pages); nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE); nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE); return nr_pages; }

Contributors

PersonTokensPropCommitsCommitProp
Mel Gorman4950.00%240.00%
Johannes Weiner4950.00%360.00%
Total98100.00%5100.00%


static unsigned long highmem_dirtyable_memory(unsigned long total) { #ifdef CONFIG_HIGHMEM int node; unsigned long x = 0; int i; for_each_node_state(node, N_HIGH_MEMORY) { for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) { struct zone *z; unsigned long nr_pages; if (!is_highmem_idx(i)) continue; z = &NODE_DATA(node)->node_zones[i]; if (!populated_zone(z)) continue; nr_pages = zone_page_state(z, NR_FREE_PAGES); /* watch for underflows */ nr_pages -= min(nr_pages, high_wmark_pages(z)); nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE); nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE); x += nr_pages; } } /* * Unreclaimable memory (kernel memory or anonymous memory * without swap) can bring down the dirtyable pages below * the zone's dirty balance reserve and the above calculation * will underflow. However we still want to add in nodes * which are below threshold (negative values) to get a more * accurate calculation but make sure that the total never * underflows. */ if ((long)x < 0) x = 0; /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only * occur in very strange VM situations but we want to make sure * that this does not occur. */ return min(x, total); #else return 0; #endif }

Contributors

PersonTokensPropCommitsCommitProp
Johannes Weiner5835.15%116.67%
Mel Gorman5231.52%233.33%
MinChan Kim2112.73%116.67%
JoonSoo Kim2012.12%116.67%
Sonny Rao148.48%116.67%
Total165100.00%6100.00%

/** * global_dirtyable_memory - number of globally dirtyable pages * * Returns the global number of pages potentially available for dirty * page cache. This is the base value for the global dirty limits. */
static unsigned long global_dirtyable_memory(void) { unsigned long x; x = global_zone_page_state(NR_FREE_PAGES); /* * Pages reserved for the kernel should not be considered * dirtyable, to prevent a situation where reclaim has to * clean pages in order to balance the zones. */ x -= min(x, totalreserve_pages); x += global_node_page_state(NR_INACTIVE_FILE); x += global_node_page_state(NR_ACTIVE_FILE); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); return x + 1; /* Ensure that we never return 0 */ }

Contributors

PersonTokensPropCommitsCommitProp
Johannes Weiner5080.65%555.56%
Sonny Rao812.90%111.11%
Mel Gorman23.23%111.11%
Michal Hocko11.61%111.11%
H Hartley Sweeten11.61%111.11%
Total62100.00%9100.00%

/** * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain * @dtc: dirty_throttle_control of interest * * Calculate @dtc->thresh and ->bg_thresh considering * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}. The caller * must ensure that @dtc->avail is set before calling this function. The * dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and * real-time tasks. */
static void domain_dirty_limits(struct dirty_throttle_control *dtc) { const unsigned long available_memory = dtc->avail; struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc); unsigned long bytes = vm_dirty_bytes; unsigned long bg_bytes = dirty_background_bytes; /* convert ratios to per-PAGE_SIZE for higher precision */ unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100; unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100; unsigned long thresh; unsigned long bg_thresh; struct task_struct *tsk; /* gdtc is !NULL iff @dtc is for memcg domain */ if (gdtc) { unsigned long global_avail = gdtc->avail; /* * The byte settings can't be applied directly to memcg * domains. Convert them to ratios by scaling against * globally available memory. As the ratios are in * per-PAGE_SIZE, they can be obtained by dividing bytes by * number of pages. */ if (bytes) ratio = min(DIV_ROUND_UP(bytes, global_avail), PAGE_SIZE); if (bg_bytes) bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail), PAGE_SIZE); bytes = bg_bytes = 0; } if (bytes) thresh = DIV_ROUND_UP(bytes, PAGE_SIZE); else thresh = (ratio * available_memory) / PAGE_SIZE; if (bg_bytes) bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE); else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; if (bg_thresh >= thresh) bg_thresh = thresh / 2; tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } dtc->thresh = thresh; dtc->bg_thresh = bg_thresh; /* we should eventually report the domain in the TP */ if (!gdtc) trace_global_dirty_state(bg_thresh, thresh); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo14855.64%240.00%
Johannes Weiner10338.72%120.00%
Neil Brown124.51%120.00%
David Rientjes31.13%120.00%
Total266100.00%5100.00%

/** * global_dirty_limits - background-writeback and dirty-throttling thresholds * @pbackground: out parameter for bg_thresh * @pdirty: out parameter for thresh * * Calculate bg_thresh and thresh for global_wb_domain. See * domain_dirty_limits() for details. */
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) { struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB }; gdtc.avail = global_dirtyable_memory(); domain_dirty_limits(&gdtc); *pbackground = gdtc.bg_thresh; *pdirty = gdtc.thresh; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo4794.00%150.00%
Johannes Weiner36.00%150.00%
Total50100.00%2100.00%

/** * node_dirty_limit - maximum number of dirty pages allowed in a node * @pgdat: the node * * Returns the maximum number of dirty pages allowed in a node, based * on the node's dirtyable memory. */
static unsigned long node_dirty_limit(struct pglist_data *pgdat) { unsigned long node_memory = node_dirtyable_memory(pgdat); struct task_struct *tsk = current; unsigned long dirty; if (vm_dirty_bytes) dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) * node_memory / global_dirtyable_memory(); else dirty = vm_dirty_ratio * node_memory / 100; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) dirty += dirty / 4; return dirty; }

Contributors

PersonTokensPropCommitsCommitProp
Johannes Weiner7390.12%150.00%
Mel Gorman89.88%150.00%
Total81100.00%2100.00%

/** * node_dirty_ok - tells whether a node is within its dirty limits * @pgdat: the node to check * * Returns %true when the dirty pages in @pgdat are within the node's * dirty limit, %false if the limit is exceeded. */
bool node_dirty_ok(struct pglist_data *pgdat) { unsigned long limit = node_dirty_limit(pgdat); unsigned long nr_pages = 0; nr_pages += node_page_state(pgdat, NR_FILE_DIRTY); nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS); nr_pages += node_page_state(pgdat, NR_WRITEBACK); return nr_pages <= limit; }

Contributors

PersonTokensPropCommitsCommitProp
Johannes Weiner2950.88%133.33%
Mel Gorman2849.12%266.67%
Total57100.00%3100.00%


int dirty_background_ratio_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_bytes = 0; return ret; }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes59100.00%1100.00%
Total59100.00%1100.00%


int dirty_background_bytes_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_ratio = 0; return ret; }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes59100.00%1100.00%
Total59100.00%1100.00%


int dirty_ratio_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { writeback_set_ratelimit(); vm_dirty_bytes = 0; } return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra5778.08%133.33%
David Rientjes1520.55%133.33%
Jan Kara11.37%133.33%
Total73100.00%3100.00%


int dirty_bytes_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { writeback_set_ratelimit(); vm_dirty_ratio = 0; } return ret; }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes5878.38%120.00%
Peter Zijlstra1317.57%240.00%
Sven Wegener22.70%120.00%
Jan Kara11.35%120.00%
Total74100.00%5100.00%


static unsigned long wp_next_time(unsigned long cur_time) { cur_time += VM_COMPLETIONS_PERIOD_LEN; /* 0 has a special meaning... */ if (!cur_time) return 1; return cur_time; }

Contributors

PersonTokensPropCommitsCommitProp
Jan Kara27100.00%1100.00%
Total27100.00%1100.00%


static void wb_domain_writeout_inc(struct wb_domain *dom, struct fprop_local_percpu *completions, unsigned int max_prop_frac) { __fprop_inc_percpu_max(&dom->completions, completions, max_prop_frac); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* * We can race with other __bdi_writeout_inc calls here but * it does not cause any harm since the resulting time when * timer will fire and what is in writeout_period_time will be * roughly the same. */ dom->period_time = wp_next_time(jiffies); mod_timer(&dom->period_timer, dom->period_time); } }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo3044.78%233.33%
Jan Kara2435.82%116.67%
Peter Zijlstra1217.91%233.33%
Steven Rostedt11.49%116.67%
Total67100.00%6100.00%

/* * Increment @wb's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */
static inline void __wb_writeout_inc(struct bdi_writeback *wb) { struct wb_domain *cgdom; inc_wb_stat(wb, WB_WRITTEN); wb_domain_writeout_inc(&global_wb_domain, &wb->completions, wb->bdi->max_prop_frac); cgdom = mem_cgroup_wb_domain(wb); if (cgdom) wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), wb->bdi->max_prop_frac); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo3450.00%228.57%
Peter Zijlstra2029.41%228.57%
Jan Kara1319.12%228.57%
Nikolay Borisov11.47%114.29%
Total68100.00%7100.00%


void wb_writeout_inc(struct bdi_writeback *wb) { unsigned long flags; local_irq_save(flags); __wb_writeout_inc(wb); local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Miklos Szeredi2482.76%150.00%
Tejun Heo517.24%150.00%
Total29100.00%2100.00%

EXPORT_SYMBOL_GPL(wb_writeout_inc); /* * On idle system, we can be called long after we scheduled because we use * deferred timers so count with missed periods. */
static void writeout_period(struct timer_list *t) { struct wb_domain *dom = from_timer(dom, t, period_timer); int miss_periods = (jiffies - dom->period_time) / VM_COMPLETIONS_PERIOD_LEN; if (fprop_new_period(&dom->completions, miss_periods + 1)) { dom->period_time = wp_next_time(dom->period_time + miss_periods * VM_COMPLETIONS_PERIOD_LEN); mod_timer(&dom->period_timer, dom->period_time); } else { /* * Aging has zeroed all fractions. Stop wasting CPU on period * updates. */ dom->period_time = 0; } }

Contributors

PersonTokensPropCommitsCommitProp
Jan Kara5358.24%133.33%
Tejun Heo2830.77%133.33%
Kees Cook1010.99%133.33%
Total91100.00%3100.00%


int wb_domain_init(struct wb_domain *dom, gfp_t gfp) { memset(dom, 0, sizeof(*dom)); spin_lock_init(&dom->lock); timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE); dom->dirty_limit_tstamp = jiffies; return fprop_global_init(&dom->completions, gfp); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo5892.06%250.00%
Geliang Tang34.76%125.00%
Kees Cook23.17%125.00%
Total63100.00%4100.00%

#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom) { del_timer_sync(&dom->period_timer); fprop_global_destroy(&dom->completions); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo26100.00%1100.00%
Total26100.00%1100.00%

#endif /* * bdi_min_ratio keeps the sum of the minimum dirty shares of all * registered backing devices, which, for obvious reasons, can not * exceed 100%. */ static unsigned int bdi_min_ratio;
int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { min_ratio -= bdi->min_ratio; if (bdi_min_ratio + min_ratio < 100) { bdi_min_ratio += min_ratio; bdi->min_ratio += min_ratio; } else { ret = -EINVAL; } } spin_unlock_bh(&bdi_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra8497.67%266.67%
Jens Axboe22.33%133.33%
Total86100.00%3100.00%


int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) { int ret = 0; if (max_ratio > 100) return -EINVAL; spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; } spin_unlock_bh(&bdi_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra7696.20%250.00%
Jens Axboe22.53%125.00%
Jan Kara11.27%125.00%
Total79100.00%4100.00%

EXPORT_SYMBOL(bdi_set_max_ratio);
static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { return (thresh + bg_thresh) / 2; }

Contributors

PersonTokensPropCommitsCommitProp
Fengguang Wu24100.00%1100.00%
Total24100.00%1100.00%


static unsigned long hard_dirty_limit(struct wb_domain *dom, unsigned long thresh) { return max(thresh, dom->dirty_limit); }

Contributors

PersonTokensPropCommitsCommitProp
Fengguang Wu1869.23%133.33%
Tejun Heo830.77%266.67%
Total26100.00%3100.00%

/* * Memory which can be further allocated to a memcg domain is capped by * system-wide clean memory excluding the amount being used in the domain. */
static void mdtc_calc_avail(struct dirty_throttle_control *mdtc, unsigned long filepages, unsigned long headroom) { struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc); unsigned long clean = filepages - min(filepages, mdtc->dirty); unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); unsigned long other_clean = global_clean - min(global_clean, clean); mdtc->avail = filepages + min(headroom, other_clean); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo8393.26%266.67%
Fengguang Wu66.74%133.33%
Total89100.00%3100.00%

/** * __wb_calc_thresh - @wb's share of dirty throttling threshold * @dtc: dirty_throttle_context of interest * * Returns @wb's dirty limit in pages. The term "dirty" in the context of * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. * * Note that balance_dirty_pages() will only seriously take it as a hard limit * when sleeping max_pause per page is not enough to keep the dirty pages under * control. For example, when the device is completely stalled due to some error * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. * In the other normal situations, it acts more gently by throttling the tasks * more (rather than completely block them) when the wb dirty pages go high. * * It allocates high/low dirty limits to fast/slow devices, in order to prevent * - starving fast devices * - piling up dirty pages (that will take long time to sync) on slow devices * * The wb's share of dirty limit will be adapting to its throughput and * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set. */
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) { struct wb_domain *dom = dtc_dom(dtc); unsigned long thresh = dtc->thresh; u64 wb_thresh; long numerator, denominator; unsigned long wb_min_ratio, wb_max_ratio; /* * Calculate this BDI's share of the thresh ratio. */ fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; wb_thresh *= numerator; do_div(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); wb_thresh += (thresh * wb_min_ratio) / 100; if (wb_thresh > (thresh * wb_max_ratio) / 100) wb_thresh = thresh * wb_max_ratio / 100; return wb_thresh; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo6649.25%654.55%
Peter Zijlstra5944.03%327.27%
Fengguang Wu85.97%19.09%
Andrew Morton10.75%19.09%
Total134100.00%11100.00%


unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh) { struct dirty_throttle_control gdtc = { GDTC_INIT(wb), .thresh = thresh }; return __wb_calc_thresh(&gdtc); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo3694.74%133.33%
Andrew Morton12.63%133.33%
Fengguang Wu12.63%133.33%
Total38100.00%3100.00%

/* * setpoint - dirty 3 * f(dirty) := 1.0 + (----------------) * limit - setpoint * * it's a 3rd order polynomial that subjects to * * (1) f(freerun) = 2.0 => rampup dirty_ratelimit reasonably fast * (2) f(setpoint) = 1.0 => the balance point * (3) f(limit) = 0 => the hard limit * (4) df/dx <= 0 => negative feedback control * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) * => fast response on large errors; small oscillation near setpoint */
static long long pos_ratio_polynom(unsigned long setpoint, unsigned long dirty, unsigned long limit) { long long pos_ratio; long x; x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, (limit - setpoint) | 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio += 1 << RATELIMIT_CALC_SHIFT; return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT); }

Contributors

PersonTokensPropCommitsCommitProp
Maxim Patlasov8795.60%133.33%
Tejun Heo33.30%133.33%
Rik Van Riel11.10%133.33%
Total91100.00%3100.00%

/* * Dirty position control. * * (o) global/bdi setpoints * * We want the dirty pages be balanced around the global/wb setpoints. * When the number of dirty pages is higher/lower than the setpoint, the * dirty position control ratio (and hence task dirty ratelimit) will be * decreased/increased to bring the dirty pages back to the setpoint. * * pos_ratio = 1 << RATELIMIT_CALC_SHIFT * * if (dirty < setpoint) scale up pos_ratio * if (dirty > setpoint) scale down pos_ratio * * if (wb_dirty < wb_setpoint) scale up pos_ratio * if (wb_dirty > wb_setpoint) scale down pos_ratio * * task_ratelimit = dirty_ratelimit * pos_ratio >> RATELIMIT_CALC_SHIFT * * (o) global control line * * ^ pos_ratio * | * | |<===== global dirty control scope ======>| * 2.0 .............* * | .* * | . * * | . * * | . * * | . * * | . * * 1.0 ................................* * | . . * * | . . * * | . . * * | . . * * | . . * * 0 +------------.------------------.----------------------*-------------> * freerun^ setpoint^ limit^ dirty pages * * (o) wb control line * * ^ pos_ratio * | * | * * | * * | * * | * * | * |<=========== span ============>| * 1.0 .......................* * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * 1/4 ...............................................* * * * * * * * * * * * * | . . * | . . * | . . * 0 +----------------------.-------------------------------.-------------> * wb_setpoint^ x_intercept^ * * The wb control line won't drop below pos_ratio=1/4, so that wb_dirty can * be smoothly throttled down to normal if it starts high in situations like * - start writing to a slow SD card and a fast disk at the same time. The SD * card's wb_dirty may rush to many times higher than wb_setpoint. * - the wb dirty thresh drops quickly due to change of JBOD workload */
static void wb_position_ratio(struct dirty_throttle_control *dtc) { struct bdi_writeback *wb = dtc->wb; unsigned long write_bw = wb->avg_write_bandwidth; unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh)