Author | Tokens | Token Proportion | Commits | Commit Proportion |
---|---|---|---|---|
Roman Gushchin | 3058 | 44.64% | 5 | 17.24% |
David Finkel | 1605 | 23.43% | 1 | 3.45% |
Mike Rapoport | 889 | 12.98% | 2 | 6.90% |
Jay Kamat | 500 | 7.30% | 1 | 3.45% |
Yosry Ahmed | 221 | 3.23% | 5 | 17.24% |
Michal Koutný | 191 | 2.79% | 3 | 10.34% |
Shakeel Butt | 157 | 2.29% | 1 | 3.45% |
David Vernet | 106 | 1.55% | 4 | 13.79% |
Ivan Orlov | 63 | 0.92% | 1 | 3.45% |
Alex Shi | 27 | 0.39% | 1 | 3.45% |
haifeng.xu | 25 | 0.36% | 2 | 6.90% |
Christophe Jaillet | 4 | 0.06% | 1 | 3.45% |
Shuah Khan | 3 | 0.04% | 1 | 3.45% |
Tianchen Ding | 2 | 0.03% | 1 | 3.45% |
Total | 6851 | 29 |
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607
/* SPDX-License-Identifier: GPL-2.0 */ #define _GNU_SOURCE #include <linux/limits.h> #include <linux/oom.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #include <sys/socket.h> #include <sys/wait.h> #include <arpa/inet.h> #include <netinet/in.h> #include <netdb.h> #include <errno.h> #include <sys/mman.h> #include "../kselftest.h" #include "cgroup_util.h" static bool has_localevents; static bool has_recursiveprot; /* * This test creates two nested cgroups with and without enabling * the memory controller. */ static int test_memcg_subtree_control(const char *root) { char *parent, *child, *parent2 = NULL, *child2 = NULL; int ret = KSFT_FAIL; char buf[PAGE_SIZE]; /* Create two nested cgroups with the memory controller enabled */ parent = cg_name(root, "memcg_test_0"); child = cg_name(root, "memcg_test_0/memcg_test_1"); if (!parent || !child) goto cleanup_free; if (cg_create(parent)) goto cleanup_free; if (cg_write(parent, "cgroup.subtree_control", "+memory")) goto cleanup_parent; if (cg_create(child)) goto cleanup_parent; if (cg_read_strstr(child, "cgroup.controllers", "memory")) goto cleanup_child; /* Create two nested cgroups without enabling memory controller */ parent2 = cg_name(root, "memcg_test_1"); child2 = cg_name(root, "memcg_test_1/memcg_test_1"); if (!parent2 || !child2) goto cleanup_free2; if (cg_create(parent2)) goto cleanup_free2; if (cg_create(child2)) goto cleanup_parent2; if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) goto cleanup_all; if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) goto cleanup_all; ret = KSFT_PASS; cleanup_all: cg_destroy(child2); cleanup_parent2: cg_destroy(parent2); cleanup_free2: free(parent2); free(child2); cleanup_child: cg_destroy(child); cleanup_parent: cg_destroy(parent); cleanup_free: free(parent); free(child); return ret; } static int alloc_anon_50M_check(const char *cgroup, void *arg) { size_t size = MB(50); char *buf, *ptr; long anon, current; int ret = -1; buf = malloc(size); if (buf == NULL) { fprintf(stderr, "malloc() failed\n"); return -1; } for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) *ptr = 0; current = cg_read_long(cgroup, "memory.current"); if (current < size) goto cleanup; if (!values_close(size, current, 3)) goto cleanup; anon = cg_read_key_long(cgroup, "memory.stat", "anon "); if (anon < 0) goto cleanup; if (!values_close(anon, current, 3)) goto cleanup; ret = 0; cleanup: free(buf); return ret; } static int alloc_pagecache_50M_check(const char *cgroup, void *arg) { size_t size = MB(50); int ret = -1; long current, file; int fd; fd = get_temp_fd(); if (fd < 0) return -1; if (alloc_pagecache(fd, size)) goto cleanup; current = cg_read_long(cgroup, "memory.current"); if (current < size) goto cleanup; file = cg_read_key_long(cgroup, "memory.stat", "file "); if (file < 0) goto cleanup; if (!values_close(file, current, 10)) goto cleanup; ret = 0; cleanup: close(fd); return ret; } /* * This test create a memory cgroup, allocates * some anonymous memory and some pagecache * and checks memory.current, memory.peak, and some memory.stat values. */ static int test_memcg_current_peak(const char *root) { int ret = KSFT_FAIL; long current, peak, peak_reset; char *memcg; bool fd2_closed = false, fd3_closed = false, fd4_closed = false; int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; struct stat ss; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; current = cg_read_long(memcg, "memory.current"); if (current != 0) goto cleanup; peak = cg_read_long(memcg, "memory.peak"); if (peak != 0) goto cleanup; if (cg_run(memcg, alloc_anon_50M_check, NULL)) goto cleanup; peak = cg_read_long(memcg, "memory.peak"); if (peak < MB(50)) goto cleanup; /* * We'll open a few FDs for the same memory.peak file to exercise the free-path * We need at least three to be closed in a different order than writes occurred to test * the linked-list handling. */ peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (peak_fd == -1) { if (errno == ENOENT) ret = KSFT_SKIP; goto cleanup; } /* * Before we try to use memory.peak's fd, try to figure out whether * this kernel supports writing to that file in the first place. (by * checking the writable bit on the file's st_mode) */ if (fstat(peak_fd, &ss)) goto cleanup; if ((ss.st_mode & S_IWUSR) == 0) { ret = KSFT_SKIP; goto cleanup; } peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (peak_fd2 == -1) goto cleanup; peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (peak_fd3 == -1) goto cleanup; /* any non-empty string resets, but make it clear */ static const char reset_string[] = "reset\n"; peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; /* Make sure a completely independent read isn't affected by our FD-local reset above*/ peak = cg_read_long(memcg, "memory.peak"); if (peak < MB(50)) goto cleanup; fd2_closed = true; if (close(peak_fd2)) goto cleanup; peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (peak_fd4 == -1) goto cleanup; peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; peak = cg_read_long_fd(peak_fd); if (peak > MB(30) || peak < 0) goto cleanup; if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) goto cleanup; peak = cg_read_long(memcg, "memory.peak"); if (peak < MB(50)) goto cleanup; /* Make sure everything is back to normal */ peak = cg_read_long_fd(peak_fd); if (peak < MB(50)) goto cleanup; peak = cg_read_long_fd(peak_fd4); if (peak < MB(50)) goto cleanup; fd3_closed = true; if (close(peak_fd3)) goto cleanup; fd4_closed = true; if (close(peak_fd4)) goto cleanup; ret = KSFT_PASS; cleanup: close(peak_fd); if (!fd2_closed) close(peak_fd2); if (!fd3_closed) close(peak_fd3); if (!fd4_closed) close(peak_fd4); cg_destroy(memcg); free(memcg); return ret; } static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) { int fd = (long)arg; int ppid = getppid(); if (alloc_pagecache(fd, MB(50))) return -1; while (getppid() == ppid) sleep(1); return 0; } static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); size_t size = (unsigned long)arg; char *buf, *ptr; buf = malloc(size); if (buf == NULL) { fprintf(stderr, "malloc() failed\n"); return -1; } for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) *ptr = 0; while (getppid() == ppid) sleep(1); free(buf); return 0; } /* * Wait until processes are killed asynchronously by the OOM killer * If we exceed a timeout, fail. */ static int cg_test_proc_killed(const char *cgroup) { int limit; for (limit = 10; limit > 0; limit--) { if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) return 0; usleep(100000); } return -1; } static bool reclaim_until(const char *memcg, long goal); /* * First, this test creates the following hierarchy: * A memory.min = 0, memory.max = 200M * A/B memory.min = 50M * A/B/C memory.min = 75M, memory.current = 50M * A/B/D memory.min = 25M, memory.current = 50M * A/B/E memory.min = 0, memory.current = 50M * A/B/F memory.min = 500M, memory.current = 0 * * (or memory.low if we test soft protection) * * Usages are pagecache and the test keeps a running * process in every leaf cgroup. * Then it creates A/G and creates a significant * memory pressure in A. * * Then it checks actual memory usages and expects that: * A/B memory.current ~= 50M * A/B/C memory.current ~= 29M * A/B/D memory.current ~= 21M * A/B/E memory.current ~= 0 * A/B/F memory.current = 0 * (for origin of the numbers, see model in memcg_protection.m.) * * After that it tries to allocate more than there is * unprotected memory in A available, and checks that: * a) memory.min protects pagecache even in this case, * b) memory.low allows reclaiming page cache with low events. * * Then we try to reclaim from A/B/C using memory.reclaim until its * usage reaches 10M. * This makes sure that: * (a) We ignore the protection of the reclaim target memcg. * (b) The previously calculated emin value (~29M) should be dismissed. */ static int test_memcg_protection(const char *root, bool min) { int ret = KSFT_FAIL, rc; char *parent[3] = {NULL}; char *children[4] = {NULL}; const char *attribute = min ? "memory.min" : "memory.low"; long c[4]; long current; int i, attempts; int fd; fd = get_temp_fd(); if (fd < 0) goto cleanup; parent[0] = cg_name(root, "memcg_test_0"); if (!parent[0]) goto cleanup; parent[1] = cg_name(parent[0], "memcg_test_1"); if (!parent[1]) goto cleanup; parent[2] = cg_name(parent[0], "memcg_test_2"); if (!parent[2]) goto cleanup; if (cg_create(parent[0])) goto cleanup; if (cg_read_long(parent[0], attribute)) { /* No memory.min on older kernels is fine */ if (min) ret = KSFT_SKIP; goto cleanup; } if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) goto cleanup; if (cg_write(parent[0], "memory.max", "200M")) goto cleanup; if (cg_write(parent[0], "memory.swap.max", "0")) goto cleanup; if (cg_create(parent[1])) goto cleanup; if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) goto cleanup; if (cg_create(parent[2])) goto cleanup; for (i = 0; i < ARRAY_SIZE(children); i++) { children[i] = cg_name_indexed(parent[1], "child_memcg", i); if (!children[i]) goto cleanup; if (cg_create(children[i])) goto cleanup; if (i > 2) continue; cg_run_nowait(children[i], alloc_pagecache_50M_noexit, (void *)(long)fd); } if (cg_write(parent[1], attribute, "50M")) goto cleanup; if (cg_write(children[0], attribute, "75M")) goto cleanup; if (cg_write(children[1], attribute, "25M")) goto cleanup; if (cg_write(children[2], attribute, "0")) goto cleanup; if (cg_write(children[3], attribute, "500M")) goto cleanup; attempts = 0; while (!values_close(cg_read_long(parent[1], "memory.current"), MB(150), 3)) { if (attempts++ > 5) break; sleep(1); } if (cg_run(parent[2], alloc_anon, (void *)MB(148))) goto cleanup; if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; for (i = 0; i < ARRAY_SIZE(children); i++) c[i] = cg_read_long(children[i], "memory.current"); if (!values_close(c[0], MB(29), 10)) goto cleanup; if (!values_close(c[1], MB(21), 10)) goto cleanup; if (c[3] != 0) goto cleanup; rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); if (min && !rc) goto cleanup; else if (!min && rc) { fprintf(stderr, "memory.low prevents from allocating anon memory\n"); goto cleanup; } current = min ? MB(50) : MB(30); if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) goto cleanup; if (!reclaim_until(children[0], MB(10))) goto cleanup; if (min) { ret = KSFT_PASS; goto cleanup; } for (i = 0; i < ARRAY_SIZE(children); i++) { int no_low_events_index = 1; long low, oom; oom = cg_read_key_long(children[i], "memory.events", "oom "); low = cg_read_key_long(children[i], "memory.events", "low "); if (oom) goto cleanup; if (i <= no_low_events_index && low <= 0) goto cleanup; if (i > no_low_events_index && low) goto cleanup; } ret = KSFT_PASS; cleanup: for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { if (!children[i]) continue; cg_destroy(children[i]); free(children[i]); } for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { if (!parent[i]) continue; cg_destroy(parent[i]); free(parent[i]); } close(fd); return ret; } static int test_memcg_min(const char *root) { return test_memcg_protection(root, true); } static int test_memcg_low(const char *root) { return test_memcg_protection(root, false); } static int alloc_pagecache_max_30M(const char *cgroup, void *arg) { size_t size = MB(50); int ret = -1; long current, high, max; int fd; high = cg_read_long(cgroup, "memory.high"); max = cg_read_long(cgroup, "memory.max"); if (high != MB(30) && max != MB(30)) return -1; fd = get_temp_fd(); if (fd < 0) return -1; if (alloc_pagecache(fd, size)) goto cleanup; current = cg_read_long(cgroup, "memory.current"); if (!values_close(current, MB(30), 5)) goto cleanup; ret = 0; cleanup: close(fd); return ret; } /* * This test checks that memory.high limits the amount of * memory which can be consumed by either anonymous memory * or pagecache. */ static int test_memcg_high(const char *root) { int ret = KSFT_FAIL; char *memcg; long high; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; if (cg_read_strcmp(memcg, "memory.high", "max\n")) goto cleanup; if (cg_write(memcg, "memory.swap.max", "0")) goto cleanup; if (cg_write(memcg, "memory.high", "30M")) goto cleanup; if (cg_run(memcg, alloc_anon, (void *)MB(31))) goto cleanup; if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) goto cleanup; if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) goto cleanup; high = cg_read_key_long(memcg, "memory.events", "high "); if (high <= 0) goto cleanup; ret = KSFT_PASS; cleanup: cg_destroy(memcg); free(memcg); return ret; } static int alloc_anon_mlock(const char *cgroup, void *arg) { size_t size = (size_t)arg; void *buf; buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); if (buf == MAP_FAILED) return -1; mlock(buf, size); munmap(buf, size); return 0; } /* * This test checks that memory.high is able to throttle big single shot * allocation i.e. large allocation within one kernel entry. */ static int test_memcg_high_sync(const char *root) { int ret = KSFT_FAIL, pid, fd = -1; char *memcg; long pre_high, pre_max; long post_high, post_max; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; pre_high = cg_read_key_long(memcg, "memory.events", "high "); pre_max = cg_read_key_long(memcg, "memory.events", "max "); if (pre_high < 0 || pre_max < 0) goto cleanup; if (cg_write(memcg, "memory.swap.max", "0")) goto cleanup; if (cg_write(memcg, "memory.high", "30M")) goto cleanup; if (cg_write(memcg, "memory.max", "140M")) goto cleanup; fd = memcg_prepare_for_wait(memcg); if (fd < 0) goto cleanup; pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); if (pid < 0) goto cleanup; cg_wait_for(fd); post_high = cg_read_key_long(memcg, "memory.events", "high "); post_max = cg_read_key_long(memcg, "memory.events", "max "); if (post_high < 0 || post_max < 0) goto cleanup; if (pre_high == post_high || pre_max != post_max) goto cleanup; ret = KSFT_PASS; cleanup: if (fd >= 0) close(fd); cg_destroy(memcg); free(memcg); return ret; } /* * This test checks that memory.max limits the amount of * memory which can be consumed by either anonymous memory * or pagecache. */ static int test_memcg_max(const char *root) { int ret = KSFT_FAIL; char *memcg; long current, max; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; if (cg_read_strcmp(memcg, "memory.max", "max\n")) goto cleanup; if (cg_write(memcg, "memory.swap.max", "0")) goto cleanup; if (cg_write(memcg, "memory.max", "30M")) goto cleanup; /* Should be killed by OOM killer */ if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) goto cleanup; current = cg_read_long(memcg, "memory.current"); if (current > MB(30) || !current) goto cleanup; max = cg_read_key_long(memcg, "memory.events", "max "); if (max <= 0) goto cleanup; ret = KSFT_PASS; cleanup: cg_destroy(memcg); free(memcg); return ret; } /* * Reclaim from @memcg until usage reaches @goal by writing to * memory.reclaim. * * This function will return false if the usage is already below the * goal. * * This function assumes that writing to memory.reclaim is the only * source of change in memory.current (no concurrent allocations or * reclaim). * * This function makes sure memory.reclaim is sane. It will return * false if memory.reclaim's error codes do not make sense, even if * the usage goal was satisfied. */ static bool reclaim_until(const char *memcg, long goal) { char buf[64]; int retries, err; long current, to_reclaim; bool reclaimed = false; for (retries = 5; retries > 0; retries--) { current = cg_read_long(memcg, "memory.current"); if (current < goal || values_close(current, goal, 3)) break; /* Did memory.reclaim return 0 incorrectly? */ else if (reclaimed) return false; to_reclaim = current - goal; snprintf(buf, sizeof(buf), "%ld", to_reclaim); err = cg_write(memcg, "memory.reclaim", buf); if (!err) reclaimed = true; else if (err != -EAGAIN) return false; } return reclaimed; } /* * This test checks that memory.reclaim reclaims the given * amount of memory (from both anon and file, if possible). */ static int test_memcg_reclaim(const char *root) { int ret = KSFT_FAIL; int fd = -1; int retries; char *memcg; long current, expected_usage; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; current = cg_read_long(memcg, "memory.current"); if (current != 0) goto cleanup; fd = get_temp_fd(); if (fd < 0) goto cleanup; cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); /* * If swap is enabled, try to reclaim from both anon and file, else try * to reclaim from file only. */ if (is_swap_enabled()) { cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); expected_usage = MB(100); } else expected_usage = MB(50); /* * Wait until current usage reaches the expected usage (or we run out of * retries). */ retries = 5; while (!values_close(cg_read_long(memcg, "memory.current"), expected_usage, 10)) { if (retries--) { sleep(1); continue; } else { fprintf(stderr, "failed to allocate %ld for memcg reclaim test\n", expected_usage); goto cleanup; } } /* * Reclaim until current reaches 30M, this makes sure we hit both anon * and file if swap is enabled. */ if (!reclaim_until(memcg, MB(30))) goto cleanup; ret = KSFT_PASS; cleanup: cg_destroy(memcg); free(memcg); close(fd); return ret; } static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; size_t size = MB(50); char *buf, *ptr; long mem_current, swap_current; int ret = -1; buf = malloc(size); if (buf == NULL) { fprintf(stderr, "malloc() failed\n"); return -1; } for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) *ptr = 0; mem_current = cg_read_long(cgroup, "memory.current"); if (!mem_current || !values_close(mem_current, mem_max, 3)) goto cleanup; swap_current = cg_read_long(cgroup, "memory.swap.current"); if (!swap_current || !values_close(mem_current + swap_current, size, 3)) goto cleanup; ret = 0; cleanup: free(buf); return ret; } /* * This test checks that memory.swap.max limits the amount of * anonymous memory which can be swapped out. Additionally, it verifies that * memory.swap.peak reflects the high watermark and can be reset. */ static int test_memcg_swap_max_peak(const char *root) { int ret = KSFT_FAIL; char *memcg; long max, peak; struct stat ss; int swap_peak_fd = -1, mem_peak_fd = -1; /* any non-empty string resets */ static const char reset_string[] = "foobarbaz"; if (!is_swap_enabled()) return KSFT_SKIP; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; if (cg_read_long(memcg, "memory.swap.current")) { ret = KSFT_SKIP; goto cleanup; } swap_peak_fd = cg_open(memcg, "memory.swap.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (swap_peak_fd == -1) { if (errno == ENOENT) ret = KSFT_SKIP; goto cleanup; } /* * Before we try to use memory.swap.peak's fd, try to figure out * whether this kernel supports writing to that file in the first * place. (by checking the writable bit on the file's st_mode) */ if (fstat(swap_peak_fd, &ss)) goto cleanup; if ((ss.st_mode & S_IWUSR) == 0) { ret = KSFT_SKIP; goto cleanup; } mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); if (mem_peak_fd == -1) goto cleanup; if (cg_read_long(memcg, "memory.swap.peak")) goto cleanup; if (cg_read_long_fd(swap_peak_fd)) goto cleanup; /* switch the swap and mem fds into local-peak tracking mode*/ int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; if (cg_read_long_fd(swap_peak_fd)) goto cleanup; if (cg_read_long(memcg, "memory.peak")) goto cleanup; if (cg_read_long_fd(mem_peak_fd)) goto cleanup; peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; if (cg_read_long_fd(mem_peak_fd)) goto cleanup; if (cg_read_strcmp(memcg, "memory.max", "max\n")) goto cleanup; if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) goto cleanup; if (cg_write(memcg, "memory.swap.max", "30M")) goto cleanup; if (cg_write(memcg, "memory.max", "30M")) goto cleanup; /* Should be killed by OOM killer */ if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) goto cleanup; if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) goto cleanup; peak = cg_read_long(memcg, "memory.peak"); if (peak < MB(29)) goto cleanup; peak = cg_read_long(memcg, "memory.swap.peak"); if (peak < MB(29)) goto cleanup; peak = cg_read_long_fd(mem_peak_fd); if (peak < MB(29)) goto cleanup; peak = cg_read_long_fd(swap_peak_fd); if (peak < MB(29)) goto cleanup; /* * open, reset and close the peak swap on another FD to make sure * multiple extant fds don't corrupt the linked-list */ peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); if (peak_reset) goto cleanup; peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); if (peak_reset) goto cleanup; /* actually reset on the fds */ peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); if (peak_reset != sizeof(reset_string)) goto cleanup; peak = cg_read_long_fd(swap_peak_fd); if (peak > MB(10)) goto cleanup; /* * The cgroup is now empty, but there may be a page or two associated * with the open FD accounted to it. */ peak = cg_read_long_fd(mem_peak_fd); if (peak > MB(1)) goto cleanup; if (cg_read_long(memcg, "memory.peak") < MB(29)) goto cleanup; if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) goto cleanup; if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) goto cleanup; max = cg_read_key_long(memcg, "memory.events", "max "); if (max <= 0) goto cleanup; peak = cg_read_long(memcg, "memory.peak"); if (peak < MB(29)) goto cleanup; peak = cg_read_long(memcg, "memory.swap.peak"); if (peak < MB(29)) goto cleanup; peak = cg_read_long_fd(mem_peak_fd); if (peak < MB(29)) goto cleanup; peak = cg_read_long_fd(swap_peak_fd); if (peak < MB(19)) goto cleanup; ret = KSFT_PASS; cleanup: if (mem_peak_fd != -1 && close(mem_peak_fd)) ret = KSFT_FAIL; if (swap_peak_fd != -1 && close(swap_peak_fd)) ret = KSFT_FAIL; cg_destroy(memcg); free(memcg); return ret; } /* * This test disables swapping and tries to allocate anonymous memory * up to OOM. Then it checks for oom and oom_kill events in * memory.events. */ static int test_memcg_oom_events(const char *root) { int ret = KSFT_FAIL; char *memcg; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; if (cg_write(memcg, "memory.max", "30M")) goto cleanup; if (cg_write(memcg, "memory.swap.max", "0")) goto cleanup; if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_read_strcmp(memcg, "cgroup.procs", "")) goto cleanup; if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) goto cleanup; if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) goto cleanup; ret = KSFT_PASS; cleanup: cg_destroy(memcg); free(memcg); return ret; } struct tcp_server_args { unsigned short port; int ctl[2]; }; static int tcp_server(const char *cgroup, void *arg) { struct tcp_server_args *srv_args = arg; struct sockaddr_in6 saddr = { 0 }; socklen_t slen = sizeof(saddr); int sk, client_sk, ctl_fd, yes = 1, ret = -1; close(srv_args->ctl[0]); ctl_fd = srv_args->ctl[1]; saddr.sin6_family = AF_INET6; saddr.sin6_addr = in6addr_any; saddr.sin6_port = htons(srv_args->port); sk = socket(AF_INET6, SOCK_STREAM, 0); if (sk < 0) return ret; if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) goto cleanup; if (bind(sk, (struct sockaddr *)&saddr, slen)) { write(ctl_fd, &errno, sizeof(errno)); goto cleanup; } if (listen(sk, 1)) goto cleanup; ret = 0; if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { ret = -1; goto cleanup; } client_sk = accept(sk, NULL, NULL); if (client_sk < 0) goto cleanup; ret = -1; for (;;) { uint8_t buf[0x100000]; if (write(client_sk, buf, sizeof(buf)) <= 0) { if (errno == ECONNRESET) ret = 0; break; } } close(client_sk); cleanup: close(sk); return ret; } static int tcp_client(const char *cgroup, unsigned short port) { const char server[] = "localhost"; struct addrinfo *ai; char servport[6]; int retries = 0x10; /* nice round number */ int sk, ret; long allocated; allocated = cg_read_long(cgroup, "memory.current"); snprintf(servport, sizeof(servport), "%hd", port); ret = getaddrinfo(server, servport, NULL, &ai); if (ret) return ret; sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (sk < 0) goto free_ainfo; ret = connect(sk, ai->ai_addr, ai->ai_addrlen); if (ret < 0) goto close_sk; ret = KSFT_FAIL; while (retries--) { uint8_t buf[0x100000]; long current, sock; if (read(sk, buf, sizeof(buf)) <= 0) goto close_sk; current = cg_read_long(cgroup, "memory.current"); sock = cg_read_key_long(cgroup, "memory.stat", "sock "); if (current < 0 || sock < 0) goto close_sk; /* exclude the memory not related to socket connection */ if (values_close(current - allocated, sock, 10)) { ret = KSFT_PASS; break; } } close_sk: close(sk); free_ainfo: freeaddrinfo(ai); return ret; } /* * This test checks socket memory accounting. * The test forks a TCP server listens on a random port between 1000 * and 61000. Once it gets a client connection, it starts writing to * its socket. * The TCP client interleaves reads from the socket with check whether * memory.current and memory.stat.sock are similar. */ static int test_memcg_sock(const char *root) { int bind_retries = 5, ret = KSFT_FAIL, pid, err; unsigned short port; char *memcg; memcg = cg_name(root, "memcg_test"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; while (bind_retries--) { struct tcp_server_args args; if (pipe(args.ctl)) goto cleanup; port = args.port = 1000 + rand() % 60000; pid = cg_run_nowait(memcg, tcp_server, &args); if (pid < 0) goto cleanup; close(args.ctl[1]); if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) goto cleanup; close(args.ctl[0]); if (!err) break; if (err != EADDRINUSE) goto cleanup; waitpid(pid, NULL, 0); } if (err == EADDRINUSE) { ret = KSFT_SKIP; goto cleanup; } if (tcp_client(memcg, port) != KSFT_PASS) goto cleanup; waitpid(pid, &err, 0); if (WEXITSTATUS(err)) goto cleanup; if (cg_read_long(memcg, "memory.current") < 0) goto cleanup; if (cg_read_key_long(memcg, "memory.stat", "sock ")) goto cleanup; ret = KSFT_PASS; cleanup: cg_destroy(memcg); free(memcg); return ret; } /* * This test disables swapping and tries to allocate anonymous memory * up to OOM with memory.group.oom set. Then it checks that all * processes in the leaf were killed. It also checks that oom_events * were propagated to the parent level. */ static int test_memcg_oom_group_leaf_events(const char *root) { int ret = KSFT_FAIL; char *parent, *child; long parent_oom_events; parent = cg_name(root, "memcg_test_0"); child = cg_name(root, "memcg_test_0/memcg_test_1"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "cgroup.subtree_control", "+memory")) goto cleanup; if (cg_write(child, "memory.max", "50M")) goto cleanup; if (cg_write(child, "memory.swap.max", "0")) goto cleanup; if (cg_write(child, "memory.oom.group", "1")) goto cleanup; cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); if (!cg_run(child, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_test_proc_killed(child)) goto cleanup; if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) goto cleanup; parent_oom_events = cg_read_key_long( parent, "memory.events", "oom_kill "); /* * If memory_localevents is not enabled (the default), the parent should * count OOM events in its children groups. Otherwise, it should not * have observed any events. */ if (has_localevents && parent_oom_events != 0) goto cleanup; else if (!has_localevents && parent_oom_events <= 0) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * This test disables swapping and tries to allocate anonymous memory * up to OOM with memory.group.oom set. Then it checks that all * processes in the parent and leaf were killed. */ static int test_memcg_oom_group_parent_events(const char *root) { int ret = KSFT_FAIL; char *parent, *child; parent = cg_name(root, "memcg_test_0"); child = cg_name(root, "memcg_test_0/memcg_test_1"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "memory.max", "80M")) goto cleanup; if (cg_write(parent, "memory.swap.max", "0")) goto cleanup; if (cg_write(parent, "memory.oom.group", "1")) goto cleanup; cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); if (!cg_run(child, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_test_proc_killed(child)) goto cleanup; if (cg_test_proc_killed(parent)) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * This test disables swapping and tries to allocate anonymous memory * up to OOM with memory.group.oom set. Then it checks that all * processes were killed except those set with OOM_SCORE_ADJ_MIN */ static int test_memcg_oom_group_score_events(const char *root) { int ret = KSFT_FAIL; char *memcg; int safe_pid; memcg = cg_name(root, "memcg_test_0"); if (!memcg) goto cleanup; if (cg_create(memcg)) goto cleanup; if (cg_write(memcg, "memory.max", "50M")) goto cleanup; if (cg_write(memcg, "memory.swap.max", "0")) goto cleanup; if (cg_write(memcg, "memory.oom.group", "1")) goto cleanup; safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) goto cleanup; cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) goto cleanup; if (kill(safe_pid, SIGKILL)) goto cleanup; ret = KSFT_PASS; cleanup: if (memcg) cg_destroy(memcg); free(memcg); return ret; } #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); const char *name; } tests[] = { T(test_memcg_subtree_control), T(test_memcg_current_peak), T(test_memcg_min), T(test_memcg_low), T(test_memcg_high), T(test_memcg_high_sync), T(test_memcg_max), T(test_memcg_reclaim), T(test_memcg_oom_events), T(test_memcg_swap_max_peak), T(test_memcg_sock), T(test_memcg_oom_group_leaf_events), T(test_memcg_oom_group_parent_events), T(test_memcg_oom_group_score_events), }; #undef T int main(int argc, char **argv) { char root[PATH_MAX]; int i, proc_status, ret = EXIT_SUCCESS; if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); /* * Check that memory controller is available: * memory is listed in cgroup.controllers */ if (cg_read_strstr(root, "cgroup.controllers", "memory")) ksft_exit_skip("memory controller isn't available\n"); if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); proc_status = proc_mount_contains("memory_recursiveprot"); if (proc_status < 0) ksft_exit_skip("Failed to query cgroup mount option\n"); has_recursiveprot = proc_status; proc_status = proc_mount_contains("memory_localevents"); if (proc_status < 0) ksft_exit_skip("Failed to query cgroup mount option\n"); has_localevents = proc_status; for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: ksft_test_result_pass("%s\n", tests[i].name); break; case KSFT_SKIP: ksft_test_result_skip("%s\n", tests[i].name); break; default: ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } return ret; }
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with Cregit http://github.com/cregit/cregit
Version 2.0-RC1