cregit-Linux how code gets into the kernel

Release 4.8 mm/memcontrol.c

Directory: mm
/* memcontrol.c - Memory Controller
 *
 * Copyright IBM Corporation, 2007
 * Author Balbir Singh <balbir@linux.vnet.ibm.com>
 *
 * Copyright 2007 OpenVZ SWsoft Inc
 * Author: Pavel Emelianov <xemul@openvz.org>
 *
 * Memory thresholds
 * Copyright (C) 2009 Nokia Corporation
 * Author: Kirill A. Shutemov
 *
 * Kernel Memory Controller
 * Copyright (C) 2012 Parallels Inc. and Google Inc.
 * Authors: Glauber Costa and Suleiman Souhlal
 *
 * Native page reclaim
 * Charge lifetime sanitation
 * Lockless page tracking & accounting
 * Unified hierarchy configuration model
 * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include <linux/page_counter.h>
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
#include <linux/backing-dev.h>
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/spinlock.h>
#include <linux/eventfd.h>
#include <linux/poll.h>
#include <linux/sort.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/vmpressure.h>
#include <linux/mm_inline.h>
#include <linux/swap_cgroup.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/lockdep.h>
#include <linux/file.h>
#include <linux/tracehook.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
#include "slab.h"

#include <asm/uaccess.h>

#include <trace/events/vmscan.h>


struct cgroup_subsys memory_cgrp_subsys __read_mostly;

EXPORT_SYMBOL(memory_cgrp_subsys);


struct mem_cgroup *root_mem_cgroup __read_mostly;


#define MEM_CGROUP_RECLAIM_RETRIES	5

/* Socket memory accounting disabled? */

static bool cgroup_memory_nosocket;

/* Kernel memory accounting disabled? */

static bool cgroup_memory_nokmem;

/* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP

int do_swap_account __read_mostly;
#else

#define do_swap_account		0
#endif

/* Whether legacy memory+swap accounting is active */

static bool do_memsw_account(void) { return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner17100.00%1100.00%
Total17100.00%1100.00%

static const char * const mem_cgroup_stat_names[] = { "cache", "rss", "rss_huge", "mapped_file", "dirty", "writeback", "swap", }; static const char * const mem_cgroup_events_names[] = { "pgpgin", "pgpgout", "pgfault", "pgmajfault", }; static const char * const mem_cgroup_lru_names[] = { "inactive_anon", "active_anon", "inactive_file", "active_file", "unevictable", }; #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 #define NUMAINFO_EVENTS_TARGET 1024 /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation */ struct mem_cgroup_tree_per_node { struct rb_root rb_root; spinlock_t lock; }; struct mem_cgroup_tree { struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; }; static struct mem_cgroup_tree soft_limit_tree __read_mostly; /* for OOM */ struct mem_cgroup_eventfd_list { struct list_head list; struct eventfd_ctx *eventfd; }; /* * cgroup_event represents events which userspace want to receive. */ struct mem_cgroup_event { /* * memcg which the event belongs to. */ struct mem_cgroup *memcg; /* * eventfd to signal userspace about the event. */ struct eventfd_ctx *eventfd; /* * Each of these stored in a list by the cgroup. */ struct list_head list; /* * register_event() callback will be used to add new userspace * waiter for changes related to this event. Use eventfd_signal() * on eventfd to send notification to userspace. */ int (*register_event)(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args); /* * unregister_event() callback will be called when userspace closes * the eventfd or on cgroup removing. This callback must be set, * if you want provide notification functionality. */ void (*unregister_event)(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd); /* * All fields below needed to unregister event when * userspace closes eventfd. */ poll_table pt; wait_queue_head_t *wqh; wait_queue_t wait; struct work_struct remove; }; static void mem_cgroup_threshold(struct mem_cgroup *memcg); static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); /* Stuffs for move charges at task migration. */ /* * Types of charges to be moved. */ #define MOVE_ANON 0x1U #define MOVE_FILE 0x2U #define MOVE_MASK (MOVE_ANON | MOVE_FILE) /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { spinlock_t lock; /* for from, to */ struct mm_struct *mm; struct mem_cgroup *from; struct mem_cgroup *to; unsigned long flags; unsigned long precharge; unsigned long moved_charge; unsigned long moved_swap; struct task_struct *moving_task; /* a task moving charges */ wait_queue_head_t waitq; /* a waitq for other context */ } mc = { .lock = __SPIN_LOCK_UNLOCKED(mc.lock), .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), }; /* * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft * limit reclaim to prevent infinite loops, if they ever occur. */ #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_ANON, MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */ MEM_CGROUP_CHARGE_TYPE_DROP, /* a page was unused swap cache */ NR_CHARGE_TYPE, }; /* for encoding cft->private value on file */ enum res_type { _MEM, _MEMSWAP, _OOM_TYPE, _KMEM, _TCP, }; #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) #define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) /* Used for OOM nofiier */ #define OOM_CONTROL (0) /* Some nice accessors for the vmpressure. */
struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) { if (!memcg) memcg = root_mem_cgroup; return &memcg->vmpressure; }

Contributors

PersonTokensPropCommitsCommitProp
anton vorontsovanton vorontsov27100.00%1100.00%
Total27100.00%1100.00%


struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) { return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; }

Contributors

PersonTokensPropCommitsCommitProp
anton vorontsovanton vorontsov26100.00%1100.00%
Total26100.00%1100.00%


static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); }

Contributors

PersonTokensPropCommitsCommitProp
michal hockomichal hocko19100.00%1100.00%
Total19100.00%1100.00%

#ifndef CONFIG_SLOB /* * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. * The main reason for not using cgroup id for this: * this works better in sparse environments, where we have a lot of memcgs, * but only a few kmem-limited. Or also, if we have, for instance, 200 * memcgs, and none but the 200th is kmem-limited, we'd have to have a * 200 entry array for that. * * The current size of the caches array is stored in memcg_nr_cache_ids. It * will double each time we have to increase it. */ static DEFINE_IDA(memcg_cache_ida); int memcg_nr_cache_ids; /* Protects memcg_nr_cache_ids */ static DECLARE_RWSEM(memcg_cache_ids_sem);
void memcg_get_cache_ids(void) { down_read(&memcg_cache_ids_sem); }

Contributors

PersonTokensPropCommitsCommitProp
glauber costaglauber costa753.85%240.00%
johannes weinerjohannes weiner430.77%120.00%
li zefanli zefan17.69%120.00%
daisuke nishimuradaisuke nishimura17.69%120.00%
Total13100.00%5100.00%


void memcg_put_cache_ids(void) { up_read(&memcg_cache_ids_sem); }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner861.54%133.33%
glauber costaglauber costa430.77%133.33%
li zefanli zefan17.69%133.33%
Total13100.00%3100.00%

/* * MIN_SIZE is different than 1, because we would like to avoid going through * the alloc/free process all the time. In a small machine, 4 kmem-limited * cgroups is a reasonable guess. In the future, it could be a parameter or * tunable, but that is strictly not necessary. * * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get * this constant directly from cgroup, but it is understandable that this is * better kept as an internal representation in cgroup.c. In any case, the * cgrp_id space is not getting any smaller, and we don't have to necessarily * increase ours as well if it increases. */ #define MEMCG_CACHES_MIN_SIZE 4 #define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX /* * A lot of the calls to the cache allocation functions are expected to be * inlined by the compiler. Since the calls to memcg_kmem_get_cache are * conditional to this static branch, we'll have to allow modules that does * kmem_cache_alloc and the such to see this symbol as well */ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); EXPORT_SYMBOL(memcg_kmem_enabled_key); #endif /* !CONFIG_SLOB */ /** * mem_cgroup_css_from_page - css of the memcg associated with a page * @page: page of interest * * If memcg is bound to the default hierarchy, css of the memcg associated * with @page is returned. The returned css remains associated with @page * until it is released. * * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup * is returned. */
struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) { struct mem_cgroup *memcg; memcg = page->mem_cgroup; if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg = root_mem_cgroup; return &memcg->css; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner44100.00%3100.00%
Total44100.00%3100.00%

/** * page_cgroup_ino - return inode number of the memcg a page is charged to * @page: the page * * Look up the closest online ancestor of the memory cgroup @page is charged to * and return its inode number or 0 if @page is not charged to any cgroup. It * is safe to call this function without holding a reference to @page. * * Note, this function is inherently racy, because there is nothing to prevent * the cgroup inode from getting torn down and potentially reallocated a moment * after page_cgroup_ino() returns, so it only should be used by callers that * do not care (such as procfs interfaces). */
ino_t page_cgroup_ino(struct page *page) { struct mem_cgroup *memcg; unsigned long ino = 0; rcu_read_lock(); memcg = READ_ONCE(page->mem_cgroup); while (memcg && !(memcg->css.flags & CSS_ONLINE)) memcg = parent_mem_cgroup(memcg); if (memcg) ino = cgroup_ino(memcg->css.cgroup); rcu_read_unlock(); return ino; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner6585.53%360.00%
vladimir davydovvladimir davydov810.53%120.00%
glauber costaglauber costa33.95%120.00%
Total76100.00%5100.00%


static struct mem_cgroup_per_node * mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) { int nid = page_to_nid(page); return memcg->nodeinfo[nid]; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton2058.82%125.00%
jianyu zhanjianyu zhan823.53%125.00%
johannes weinerjohannes weiner411.76%125.00%
mel gormanmel gorman25.88%125.00%
Total34100.00%4100.00%


static struct mem_cgroup_tree_per_node * soft_limit_tree_node(int nid) { return soft_limit_tree.rb_tree_per_node[nid]; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton1473.68%125.00%
tejun heotejun heo210.53%125.00%
mel gormanmel gorman210.53%125.00%
johannes weinerjohannes weiner15.26%125.00%
Total19100.00%4100.00%


static struct mem_cgroup_tree_per_node * soft_limit_tree_from_page(struct page *page) { int nid = page_to_nid(page); return soft_limit_tree.rb_tree_per_node[nid]; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton2896.55%150.00%
mel gormanmel gorman13.45%150.00%
Total29100.00%2100.00%


static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) { struct rb_node **p = &mctz->rb_root.rb_node; struct rb_node *parent = NULL; struct mem_cgroup_per_node *mz_node; if (mz->on_tree) return; mz->usage_in_excess = new_usage_in_excess; if (!mz->usage_in_excess) return; while (*p) { parent = *p; mz_node = rb_entry(parent, struct mem_cgroup_per_node, tree_node); if (mz->usage_in_excess < mz_node->usage_in_excess) p = &(*p)->rb_left; /* * We can't avoid mem cgroups that are over their soft * limit by the same amount */ else if (mz->usage_in_excess >= mz_node->usage_in_excess) p = &(*p)->rb_right; } rb_link_node(&mz->tree_node, parent, p); rb_insert_color(&mz->tree_node, &mctz->rb_root); mz->on_tree = true; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton15997.55%150.00%
mel gormanmel gorman42.45%150.00%
Total163100.00%2100.00%


static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz) { if (!mz->on_tree) return; rb_erase(&mz->tree_node, &mctz->rb_root); mz->on_tree = false; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton4195.35%150.00%
mel gormanmel gorman24.65%150.00%
Total43100.00%2100.00%


static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz) { unsigned long flags; spin_lock_irqsave(&mctz->lock, flags); __mem_cgroup_remove_exceeded(mz, mctz); spin_unlock_irqrestore(&mctz->lock, flags); }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton3574.47%133.33%
johannes weinerjohannes weiner1021.28%133.33%
mel gormanmel gorman24.26%133.33%
Total47100.00%3100.00%


static unsigned long soft_limit_excess(struct mem_cgroup *memcg) { unsigned long nr_pages = page_counter_read(&memcg->memory); unsigned long soft_limit = READ_ONCE(memcg->soft_limit); unsigned long excess = 0; if (nr_pages > soft_limit) excess = nr_pages - soft_limit; return excess; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner5598.21%150.00%
jason lowjason low11.79%150.00%
Total56100.00%2100.00%


static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) { unsigned long excess; struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; mctz = soft_limit_tree_from_page(page); /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { mz = mem_cgroup_page_nodeinfo(memcg, page); excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or * mem is over its softlimit. */ if (excess || mz->on_tree) { unsigned long flags; spin_lock_irqsave(&mctz->lock, flags); /* if on-tree, remove it */ if (mz->on_tree) __mem_cgroup_remove_exceeded(mz, mctz); /* * Insert again. mz->usage_in_excess will be updated. * If excess is 0, no tree ops. */ __mem_cgroup_insert_exceeded(mz, mctz, excess); spin_unlock_irqrestore(&mctz->lock, flags); } } }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton11288.19%120.00%
johannes weinerjohannes weiner118.66%240.00%
mel gormanmel gorman32.36%120.00%
jianyu zhanjianyu zhan10.79%120.00%
Total127100.00%5100.00%


static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) { struct mem_cgroup_tree_per_node *mctz; struct mem_cgroup_per_node *mz; int nid; for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); mctz = soft_limit_tree_node(nid); mem_cgroup_remove_exceeded(mz, mctz); } }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton3158.49%120.00%
jianyu zhanjianyu zhan815.09%120.00%
mel gormanmel gorman713.21%120.00%
balbir singhbalbir singh611.32%120.00%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)11.89%120.00%
Total53100.00%5100.00%


static struct mem_cgroup_per_node * __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) { struct rb_node *rightmost = NULL; struct mem_cgroup_per_node *mz; retry: mz = NULL; rightmost = rb_last(&mctz->rb_root); if (!rightmost) goto done; /* Nothing to reclaim from */ mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node); /* * Remove the node now but someone else can add it back, * we will to add it back at the end of reclaim to its correct * position in the tree. */ __mem_cgroup_remove_exceeded(mz, mctz); if (!soft_limit_excess(mz->memcg) || !css_tryget_online(&mz->memcg->css)) goto retry; done: return mz; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton7474.75%114.29%
balbir singhbalbir singh1616.16%114.29%
mel gormanmel gorman44.04%114.29%
michal hockomichal hocko22.02%114.29%
tejun heotejun heo11.01%114.29%
johannes weinerjohannes weiner11.01%114.29%
hugh dickinshugh dickins11.01%114.29%
Total99100.00%7100.00%


static struct mem_cgroup_per_node * mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) { struct mem_cgroup_per_node *mz; spin_lock_irq(&mctz->lock); mz = __mem_cgroup_largest_soft_limit_node(mctz); spin_unlock_irq(&mctz->lock); return mz; }

Contributors

PersonTokensPropCommitsCommitProp
andrew mortonandrew morton2352.27%120.00%
balbir singhbalbir singh1431.82%120.00%
mel gormanmel gorman36.82%120.00%
michal hockomichal hocko24.55%120.00%
johannes weinerjohannes weiner24.55%120.00%
Total44100.00%5100.00%

/* * Return page count for single (non recursive) @memcg. * * Implementation Note: reading percpu statistics for memcg. * * Both of vmstat[] and percpu_counter has threshold and do periodic * synchronization to implement "quick" read. There are trade-off between * reading cost and precision of value. Then, we may have a chance to implement * a periodic synchronization of counter in memcg's counter. * * But this _read() function is used for user interface now. The user accounts * memory usage by memory cgroup and he _always_ requires exact value because * he accounts memory. Even if we provide quick-and-fuzzy read, we always * have to visit all online cpus and make sum. So, for now, unnecessary * synchronization is not implemented. (just implemented for cpu hotplug) * * If there are kernel internal actions which can make use of some not-exact * value, and reading all cpu value can be performance bottleneck in some * common workload, threshold and synchronization as vmstat[] should be * implemented. */
static unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; int cpu; /* Per-cpu values can be negative, use a signed accumulator */ for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->count[idx], cpu); /* * Summing races with updates, so val may be negative. Avoid exposing * transient negative values. */ if (val < 0) val = 0; return val; }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki3864.41%120.00%
greg thelengreg thelen1322.03%120.00%
johannes weinerjohannes weiner58.47%120.00%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)23.39%120.00%
tejun heotejun heo11.69%120.00%
Total59100.00%5100.00%


static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx) { unsigned long val = 0; int cpu; for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->events[idx], cpu); return val; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner4593.75%133.33%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)24.17%133.33%
tejun heotejun heo12.08%133.33%
Total48100.00%3100.00%


static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, struct page *page, bool compound, int nr_pages) { /* * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is * counted as CACHE even if it's on ANON LRU. */ if (PageAnon(page)) __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); else __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); if (compound) { VM_BUG_ON_PAGE(!PageTransHuge(page), page); __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_pages); } /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); else { __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); nr_pages = -nr_pages; /* for event */ } __this_cpu_add(memcg->stat->nr_page_events, nr_pages); }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki5438.30%535.71%
balbir singhbalbir singh2719.15%17.14%
david rientjesdavid rientjes2517.73%17.14%
kirill a. shutemovkirill a. shutemov1712.06%214.29%
johannes weinerjohannes weiner96.38%321.43%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)64.26%17.14%
kosaki motohirokosaki motohiro32.13%17.14%
Total141100.00%14100.00%


unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) { unsigned long nr = 0; struct mem_cgroup_per_node *mz; enum lru_list lru; VM_BUG_ON((unsigned)nid >= nr_node_ids); for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; mz = mem_cgroup_nodeinfo(memcg, nid); nr += mz->lru_size[lru]; } return nr; }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki3845.78%222.22%
jianyu zhanjianyu zhan2024.10%111.11%
mel gormanmel gorman1214.46%111.11%
hugh dickinshugh dickins78.43%222.22%
ying hanying han33.61%111.11%
konstantin khlebnikovkonstantin khlebnikov22.41%111.11%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)11.20%111.11%
Total83100.00%9100.00%


static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, unsigned int lru_mask) { unsigned long nr = 0; int nid; for_each_node_state(nid, N_MEMORY) nr += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask); return nr; }

Contributors

PersonTokensPropCommitsCommitProp
ying hanying han2657.78%120.00%
jianyu zhanjianyu zhan817.78%120.00%
kamezawa hiroyukikamezawa hiroyuki817.78%120.00%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)24.44%120.00%
lai jiangshanlai jiangshan12.22%120.00%
Total45100.00%5100.00%


static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, enum mem_cgroup_events_target target) { unsigned long val, next; val = __this_cpu_read(memcg->stat->nr_page_events); next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ if ((long)next - (long)val < 0) { switch (target) { case MEM_CGROUP_TARGET_THRESH: next = val + THRESHOLDS_EVENTS_TARGET; break; case MEM_CGROUP_TARGET_SOFTLIMIT: next = val + SOFTLIMIT_EVENTS_TARGET; break; case MEM_CGROUP_TARGET_NUMAINFO: next = val + NUMAINFO_EVENTS_TARGET; break; default: break; } __this_cpu_write(memcg->stat->targets[target], next); return true; } return false; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner7259.50%444.44%
kamezawa hiroyukikamezawa hiroyuki3226.45%222.22%
andrew mortonandrew morton108.26%111.11%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)43.31%111.11%
steven rostedtsteven rostedt32.48%111.11%
Total121100.00%9100.00%

/* * Check events in order. * */
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_THRESH))) { bool do_softlimit; bool do_numainfo __maybe_unused; do_softlimit = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_SOFTLIMIT); #if MAX_NUMNODES > 1 do_numainfo = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_NUMAINFO); #endif mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) mem_cgroup_update_tree(memcg, page); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); #endif } }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki4745.19%228.57%
andrew mortonandrew morton2725.96%228.57%
johannes weinerjohannes weiner2625.00%228.57%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)43.85%114.29%
Total104100.00%7100.00%


struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { /* * mm_update_next_owner() may clear mm->owner to NULL * if it races with swapoff, page migration, etc. * So this can be called with p == NULL. */ if (unlikely(!p)) return NULL; return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); }

Contributors

PersonTokensPropCommitsCommitProp
pavel emelianovpavel emelianov2057.14%120.00%
balbir singhbalbir singh1234.29%120.00%
tejun heotejun heo25.71%240.00%
wanpeng liwanpeng li12.86%120.00%
Total35100.00%5100.00%

EXPORT_SYMBOL(mem_cgroup_from_task);
static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) { struct mem_cgroup *memcg = NULL; rcu_read_lock(); do { /* * Page cache insertions can happen withou an * actual mm context, e.g. during disk probing * on boot, loopback IO, acct() writes etc. */ if (unlikely(!mm)) memcg = root_mem_cgroup; else { memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!memcg)) memcg = root_mem_cgroup; } } while (!css_tryget_online(&memcg->css)); rcu_read_unlock(); return memcg; }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki5666.67%120.00%
michal hockomichal hocko1619.05%120.00%
johannes weinerjohannes weiner67.14%120.00%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)55.95%120.00%
tejun heotejun heo11.19%120.00%
Total84100.00%5100.00%

/** * mem_cgroup_iter - iterate over memory cgroup hierarchy * @root: hierarchy root * @prev: previously returned memcg, NULL on first invocation * @reclaim: cookie for shared reclaim walks, NULL for full walks * * Returns references to children of the hierarchy below @root, or * @root itself, or %NULL after a full round-trip. * * Caller must pass the return value in @prev on subsequent * invocations for reference counting, or use mem_cgroup_iter_break() * to cancel a hierarchy walk before the round-trip is complete. * * Reclaimers can specify a zone and a priority level in @reclaim to * divide up the memcgs in the hierarchy among all concurrent * reclaimers operating on the same zone and priority. */
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, struct mem_cgroup_reclaim_cookie *reclaim) { struct mem_cgroup_reclaim_iter *uninitialized_var(iter); struct cgroup_subsys_state *css = NULL; struct mem_cgroup *memcg = NULL; struct mem_cgroup *pos = NULL; if (mem_cgroup_disabled()) return NULL; if (!root) root = root_mem_cgroup; if (prev && !reclaim) pos = prev; if (!root->use_hierarchy && root != root_mem_cgroup) { if (prev) goto out; return root; } rcu_read_lock(); if (reclaim) { struct mem_cgroup_per_node *mz; mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id); iter = &mz->iter[reclaim->priority]; if (prev && reclaim->generation != iter->generation) goto out_unlock; while (1) { pos = READ_ONCE(iter->position); if (!pos || css_tryget(&pos->css)) break; /* * css reference reached zero, so iter->position will * be cleared by ->css_released. However, we should not * rely on this happening soon, because ->css_released * is called from a work queue, and by busy-waiting we * might block it. So we clear iter->position right * away. */ (void)cmpxchg(&iter->position, pos, NULL); } } if (pos) css = &pos->css; for (;;) { css = css_next_descendant_pre(css, &root->css); if (!css) { /* * Reclaimers share the hierarchy walk, and a * new one might jump in right at the end of * the hierarchy - make sure they see at least * one group and restart from the beginning. */ if (!prev) continue; break; } /* * Verify the css and acquire a reference. The root * is provided by the caller, so we know it's alive * and kicking, and don't take an extra reference. */ memcg = mem_cgroup_from_css(css); if (css == &root->css) break; if (css_tryget(css)) break; memcg = NULL; } if (reclaim) { /* * The position could have already been updated by a competing * thread, so check that the value hasn't changed since we read * it to avoid reclaiming from the same cgroup twice. */ (void)cmpxchg(&iter->position, pos, memcg); if (pos) css_put(&pos->css); if (!memcg) iter->generation++; else if (!prev) reclaim->generation = iter->generation; } out_unlock: rcu_read_unlock(); out: if (prev && prev != root) css_put(&prev->css); return memcg; }

Contributors

PersonTokensPropCommitsCommitProp
johannes weinerjohannes weiner22562.50%730.43%
michal hockomichal hocko8022.22%730.43%
vladimir davydovvladimir davydov298.06%14.35%
kamezawa hiroyukikamezawa hiroyuki154.17%28.70%
mel gormanmel gorman51.39%14.35%
jianyu zhanjianyu zhan20.56%14.35%
raghavendra k t* (same as raghavendra koushik)raghavendra k t* (same as raghavendra koushik)10.28%14.35%
jason lowjason low10.28%14.35%
tejun heotejun heo10.28%14.35%
andrew mortonandrew morton10.28%14.35%
Total360100.00%23100.00%

/** * mem_cgroup_iter_break - abort a hierarchy walk prematurely * @root: hierarchy root * @prev: last visited hierarchy member as returned by mem_cgroup_iter() */
void mem_cgroup_iter_break(struct mem_cgroup *root, struct mem_cgroup *prev) { if (!root) root = root_mem_cgroup; if (prev && prev != root) css_put(&prev->css); }

Contributors

PersonTokensPropCommitsCommitProp
kamezawa hiroyukikamezawa hiroyuki2050.00%350.00%
michal hockomichal hocko1332.50%116.67%
johannes weinerjohannes weiner410.00%116.67%
ying hanying han37.50%116.67%
Total40100.00%6100.00%


static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) { struct mem_cgroup *memcg = dead_memcg; struct mem_cgroup_reclaim_iter *iter; struct mem_cgroup_per_node *mz; int nid; int i; while (