cregit-Linux how code gets into the kernel

Release 4.18 fs/super.c

Directory: fs
// SPDX-License-Identifier: GPL-2.0
 *  linux/fs/super.c
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  super.c contains code to handle: - mount structures
 *                                   - super-block tables
 *                                   - filesystem drivers list
 *                                   - mount system call
 *                                   - umount system call
 *                                   - ustat system call
 * GK 2/5/95  -  Changed to support mounting the root fs via NFS
 *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
 *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
 *  Added options to /proc/mounts:
 *    Torbjörn Lindh (, April 14, 1996.
 *  Added devfs support: Richard Gooch <>, 13-JAN-1998
 *  Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000

#include <linux/export.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h>		/* for the emergency remount stuff */
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
#include "internal.h"

static int thaw_super_locked(struct super_block *sb);

static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);

static char *sb_writers_name[SB_FREEZE_LEVELS] = {

 * One thing we have to be careful of with a per-sb shrinker is that we don't
 * drop the last active reference to the superblock from within the shrinker.
 * If that happens we could trigger unregistering the shrinker from within the
 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
 * take a passive reference to the superblock to avoid this from occurring.

static unsigned long super_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct super_block *sb; long fs_objects = 0; long total_objects; long freed = 0; long dentries; long inodes; sb = container_of(shrink, struct super_block, s_shrink); /* * Deadlock avoidance. We may hold various FS locks, and we don't want * to recurse into the FS that called us in clear_inode() and friends.. */ if (!(sc->gfp_mask & __GFP_FS)) return SHRINK_STOP; if (!trylock_super(sb)) return SHRINK_STOP; if (sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb, sc); inodes = list_lru_shrink_count(&sb->s_inode_lru, sc); dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects = dentries + inodes + fs_objects + 1; if (!total_objects) total_objects = 1; /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects); /* * prune the dcache first as the icache is pinned by it, then * prune the icache, followed by the filesystem specific caches * * Ensure that we always scan at least one object - memcg kmem * accounting uses this to fully empty the caches. */ sc->nr_to_scan = dentries + 1; freed = prune_dcache_sb(sb, sc); sc->nr_to_scan = inodes + 1; freed += prune_icache_sb(sb, sc); if (fs_objects) { sc->nr_to_scan = fs_objects + 1; freed += sb->s_op->free_cached_objects(sb, sc); } up_read(&sb->s_umount); return freed; }


Dave Chinner19776.95%650.00%
Vladimir Davydov3513.67%325.00%
Glauber de Oliveira Costa103.91%18.33%
Tetsuo Handa93.52%18.33%
Konstantin Khlebnikov51.95%18.33%

static unsigned long super_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct super_block *sb; long total_objects = 0; sb = container_of(shrink, struct super_block, s_shrink); /* * We don't call trylock_super() here as it is a scalability bottleneck, * so we're exposed to partial setup state. The shrinker rwsem does not * protect filesystem operations backing list_lru_shrink_count() or * s_op->nr_cached_objects(). Counts can change between * super_cache_count and super_cache_scan, so we really don't need locks * here. * * However, if we are currently mounting the superblock, the underlying * filesystem might be in a state of partial construction and hence it * is dangerous to access it. trylock_super() uses a SB_BORN check to * avoid this situation, so do the same here. The memory barrier is * matched with the one in mount_fs() as we don't hold locks here. */ if (!(sb->s_flags & SB_BORN)) return 0; smp_rmb(); if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); total_objects = vfs_pressure_ratio(total_objects); return total_objects; }


Dave Chinner11195.69%777.78%
Glauber de Oliveira Costa32.59%111.11%
Vladimir Davydov21.72%111.11%

static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); int i; for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_free_rwsem(&s->s_writers.rw_sem[i]); kfree(s); }


Jan Kara3050.00%125.00%
Oleg Nesterov2643.33%250.00%
Al Viro46.67%125.00%

static void destroy_super_rcu(struct rcu_head *head) { struct super_block *s = container_of(head, struct super_block, rcu); INIT_WORK(&s->destroy_work, destroy_super_work); schedule_work(&s->destroy_work); }


Oleg Nesterov44100.00%1100.00%

/* Free a superblock that has never been seen by anyone */
static void destroy_unused_super(struct super_block *s) { if (!s) return; up_write(&s->s_umount); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); free_prealloced_shrinker(&s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); }


Oleg Nesterov2431.17%116.67%
Al Viro2025.97%233.33%
Jan Kara1823.38%116.67%
Tetsuo Handa810.39%116.67%
Eric W. Biedermann79.09%116.67%

/** * alloc_super - create new superblock * @type: filesystem type superblock should belong to * @flags: the mount flags * @user_ns: User namespace for the super_block * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */
static struct super_block *alloc_super(struct file_system_type *type, int flags, struct user_namespace *user_ns) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; int i; if (!s) return NULL; INIT_LIST_HEAD(&s->s_mounts); s->s_user_ns = get_user_ns(user_ns); init_rwsem(&s->s_umount); lockdep_set_class(&s->s_umount, &type->s_umount_key); /* * sget() can have s_umount recursion. * * When it cannot find a suitable sb, it allocates a new * one (this one), and tries again to find a suitable old * one. * * In case that succeeds, it will acquire the s_umount * lock of the old one. Since these are clearly distrinct * locks, and this object isn't exposed yet, there's no * risk of deadlocks. * * Annotate this by putting this lock in a different * subclass. */ down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); if (security_sb_alloc(s)) goto fail; for (i = 0; i < SB_FREEZE_LEVELS; i++) { if (__percpu_init_rwsem(&s->s_writers.rw_sem[i], sb_writers_name[i], &type->s_writers_key[i])) goto fail; } init_waitqueue_head(&s->s_writers.wait_unfrozen); s->s_bdi = &noop_backing_dev_info; s->s_flags = flags; if (s->s_user_ns != &init_user_ns) s->s_iflags |= SB_I_NODEV; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_roots); mutex_init(&s->s_sync_lock); INIT_LIST_HEAD(&s->s_inodes); spin_lock_init(&s->s_inode_list_lock); INIT_LIST_HEAD(&s->s_inodes_wb); spin_lock_init(&s->s_inode_wblist_lock); if (list_lru_init_memcg(&s->s_dentry_lru)) goto fail; if (list_lru_init_memcg(&s->s_inode_lru)) goto fail; s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); init_rwsem(&s->s_dquot.dqio_sem); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; if (prealloc_shrinker(&s->s_shrink)) goto fail; return s; fail: destroy_unused_super(s); return NULL; }


Al Viro9522.67%510.64%
Dave Chinner7818.62%817.02%
Linus Torvalds7517.90%36.38%
Eric W. Biedermann296.92%24.26%
Tetsuo Handa133.10%12.13%
Roland Dreier133.10%12.13%
Vladimir Davydov133.10%36.38%
Glauber de Oliveira Costa102.39%12.13%
Andrew Morton102.39%12.13%
William Lee Irwin III81.91%12.13%
Jan Kara81.91%36.38%
Stephen D. Smalley81.91%12.13%
David Howells71.67%12.13%
Neil Brown71.67%24.26%
Oleg Nesterov61.43%12.13%
Kentaro Makita61.43%12.13%
Christoph Hellwig61.43%24.26%
Andi Kleen61.43%12.13%
Dan Magenheimer51.19%12.13%
Ingo Molnar40.95%12.13%
Jens Axboe40.95%12.13%
Miklos Szeredi20.48%12.13%
Arjan van de Ven20.48%12.13%
Greg Kroah-Hartman10.24%12.13%
Nicholas Piggin10.24%12.13%
Alexey Dobriyan10.24%12.13%
Oliver Neukum10.24%12.13%

/* Superblock refcounting */ /* * Drop a superblock's refcount. The caller must hold sb_lock. */
static void __put_super(struct super_block *s) { if (!--s->s_count) { list_del_init(&s->s_list); WARN_ON(s->s_dentry_lru.node); WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); } }


Al Viro6877.27%466.67%
Andrew Morton1820.45%116.67%
Kirill Korotaev22.27%116.67%

/** * put_super - drop a temporary reference to superblock * @sb: superblock in question * * Drops a temporary reference, frees superblock if there's no * references left. */
static void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); spin_unlock(&sb_lock); }


Linus Torvalds1864.29%240.00%
Al Viro725.00%240.00%
Andrew Morton310.71%120.00%

/** * deactivate_locked_super - drop an active reference to superblock * @s: superblock to deactivate * * Drops an active reference to superblock, converting it into a temporary * one if there is no other active references left. In that case we * tell fs driver to shut it down and drop the temporary reference we * had just acquired. * * Caller holds exclusive lock on superblock; that lock is released. */
void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { cleancache_invalidate_fs(s); unregister_shrinker(&s->s_shrink); fs->kill_sb(s); /* * Since list_lru_destroy() may sleep, we cannot call it from * put_super(), where we hold the sb_lock. Therefore we destroy * the lru lists right now. */ list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); put_filesystem(fs); put_super(s); } else { up_write(&s->s_umount); } }


Al Viro4044.94%327.27%
Vladimir Davydov1617.98%19.09%
Linus Torvalds1213.48%327.27%
Dave Chinner88.99%19.09%
David Chinner88.99%19.09%
Dan Magenheimer55.62%218.18%

EXPORT_SYMBOL(deactivate_locked_super); /** * deactivate_super - drop an active reference to superblock * @s: superblock to deactivate * * Variant of deactivate_locked_super(), except that superblock is *not* * locked by caller. If we are going to drop the final active reference, * lock will be acquired prior to that. */
void deactivate_super(struct super_block *s) { if (!atomic_add_unless(&s->s_active, -1, 1)) { down_write(&s->s_umount); deactivate_locked_super(s); } }


Al Viro41100.00%2100.00%

EXPORT_SYMBOL(deactivate_super); /** * grab_super - acquire an active reference * @s: reference we are trying to make active * * Tries to acquire an active reference. grab_super() is used when we * had just found a superblock in super_blocks or fs_type->fs_supers * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or * dying when grab_super() had been called). Note that this is only * called for superblocks not in rundown mode (== ones still on ->fs_supers * of their type), so increment of ->s_count is OK here. */
static int grab_super(struct super_block *s) __releases(sb_lock) { s->s_count++; spin_unlock(&sb_lock); down_write(&s->s_umount); if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) { put_super(s); return 1; } up_write(&s->s_umount); put_super(s); return 0; }


Linus Torvalds4152.56%120.00%
Al Viro3241.03%240.00%
Josh Triplett45.13%120.00%
David Howells11.28%120.00%

/* * trylock_super - try to grab ->s_umount shared * @sb: reference we are trying to grab * * Try to prevent fs shutdown. This is used in places where we * cannot take an active reference but we need to ensure that the * filesystem is not shut down while we are working on it. It returns * false if we cannot acquire s_umount or if we lose the race and * filesystem already got into shutdown, and returns true with the s_umount * lock held in read mode in case of success. On successful return, * the caller must drop the s_umount lock when done. * * Note that unlike get_super() this one does *not* bump ->s_count. * The reason why it's safe is that we are OK with doing trylock instead * of down_read(). There's a couple of places that are OK with that, but * it's very much not a general-purpose interface. */
bool trylock_super(struct super_block *sb) { if (down_read_trylock(&sb->s_umount)) { if (!hlist_unhashed(&sb->s_instances) && sb->s_root && (sb->s_flags & SB_BORN)) return true; up_read(&sb->s_umount); } return false; }


Dave Chinner4474.58%125.00%
Konstantin Khlebnikov711.86%125.00%
Al Viro711.86%125.00%
David Howells11.69%125.00%

/** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill * * generic_shutdown_super() does all fs-independent work on superblock * shutdown. Typical ->kill_sb() should pick all fs-specific objects * that need destruction out of superblock, call generic_shutdown_super() * and release aforementioned objects. Note: dentries and inodes _are_ * taken care of and do not need specific handling. * * Upon calling this function, the filesystem may no longer alter or * rearrange the set of dentries belonging to this super_block, nor may it * change the attachments of dentries to inodes. */
void generic_shutdown_super(struct super_block *sb) { const struct super_operations *sop = sb->s_op; if (sb->s_root) { shrink_dcache_for_umount(sb); sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; fsnotify_unmount_inodes(sb); cgroup_writeback_umount(); evict_inodes(sb); if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); sb->s_dio_done_wq = NULL; } if (sop->put_super) sop->put_super(sb); if (!list_empty(&sb->s_inodes)) { printk("VFS: Busy inodes after unmount of %s. " "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ hlist_del_init(&sb->s_instances); spin_unlock(&sb_lock); up_write(&sb->s_umount); if (sb->s_bdi != &noop_backing_dev_info) { bdi_put(sb->s_bdi); sb->s_bdi = &noop_backing_dev_info; } }


Al Viro10461.54%426.67%
Jan Kara2615.38%320.00%
Christoph Hellwig2112.43%16.67%
Dave Jones52.96%16.67%
David Howells42.37%213.33%
Neil Brown42.37%16.67%
Tejun Heo31.78%16.67%
Kirill Korotaev10.59%16.67%
Josef 'Jeff' Sipek10.59%16.67%

EXPORT_SYMBOL(generic_shutdown_super); /** * sget_userns - find or create a superblock * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback * @flags: mount flags * @user_ns: User namespace for the super_block * @data: argument to each of them */
struct super_block *sget_userns(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, struct user_namespace *user_ns, void *data) { struct super_block *s = NULL; struct super_block *old; int err; if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); retry: spin_lock(&sb_lock); if (test) { hlist_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (user_ns != old->s_user_ns) { spin_unlock(&sb_lock); destroy_unused_super(s); return ERR_PTR(-EBUSY); } if (!grab_super(old)) goto retry; destroy_unused_super(s); return old; } } if (!s) { spin_unlock(&sb_lock); s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; } err = set(s, data); if (err) { spin_unlock(&sb_lock); destroy_unused_super(s); return ERR_PTR(err); } s->s_type = type; strlcpy(s->s_id, type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); register_shrinker_prepared(&s->s_shrink); return s; }


Al Viro20863.03%529.41%
Eric W. Biedermann7221.82%317.65%
Andrew Morton185.45%15.88%
David Howells82.42%211.76%
Dave Chinner72.12%15.88%
Matthias Kaehlcke72.12%15.88%
Li Zefan41.21%15.88%
Ingo Molnar30.91%15.88%
Kirill Korotaev20.61%15.88%
Tetsuo Handa10.30%15.88%

EXPORT_SYMBOL(sget_userns); /** * sget - find or create a superblock * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback * @flags: mount flags * @data: argument to each of them */
struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data) { struct user_namespace *user_ns = current_user_ns(); /* We don't yet pass the user namespace of the parent * mount through to here so always use &init_user_ns * until that changes. */ if (flags & SB_SUBMOUNT) user_ns = &init_user_ns; /* Ensure the requestor has permissions over the target filesystem */ if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); return sget_userns(type, test, set, flags, user_ns, data); }


Eric W. Biedermann10997.32%266.67%
David Howells32.68%133.33%

void drop_super(struct super_block *sb) { up_read(&sb->s_umount); put_super(sb); }


Linus Torvalds23100.00%4100.00%

void drop_super_exclusive(struct super_block *sb) { up_write(&sb->s_umount); put_super(sb); }


Jan Kara23100.00%1100.00%

static void __iterate_supers(void (*f)(struct super_block *)) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; spin_unlock(&sb_lock); f(sb); spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); }


Mateusz Guzik104100.00%1100.00%

/** * iterate_supers - call function for all active superblocks * @f: function to call * @arg: argument to pass to it * * Scans the superblock list and calls given function, passing it * locked superblock and given argument. */
void iterate_supers(void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); if (sb->s_root && (sb->s_flags & SB_BORN)) f(sb, arg); up_read(&sb->s_umount); spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); }


Al Viro13897.18%466.67%
Nicholas Piggin32.11%116.67%
David Howells10.70%116.67%

/** * iterate_supers_type - call function for superblocks of given type * @type: fs type * @f: function to call * @arg: argument to pass to it * * Scans the superblock list and calls given function, passing it * locked superblock and given argument. */
void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); hlist_for_each_entry(sb, &type->fs_supers, s_instances) { sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); if (sb->s_root && (sb->s_flags & SB_BORN)) f(sb, arg); up_read(&sb->s_umount); spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); }


Al Viro13599.26%375.00%
David Howells10.74%125.00%

static struct super_block *__get_super(struct block_device *bdev, bool excl) { struct super_block *sb; if (!bdev) return NULL; spin_lock(&sb_lock); rescan: list_for_each_entry(sb, &super_blocks, s_list) { if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); if (!excl) down_read(&sb->s_umount); else down_write(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; if (!excl) up_read(&sb->s_umount); else up_write(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); goto rescan; } } spin_unlock(&sb_lock); return NULL; }


Al Viro7243.64%535.71%
Jan Kara3320.00%17.14%
Kirill Korotaev2716.36%17.14%
Linus Torvalds2414.55%428.57%
Linus Torvalds (pre-git)84.85%214.29%
David Howells10.61%17.14%

/** * get_super - get the superblock of a device * @bdev: device to get the superblock for * * Scans the superblock list and finds the superblock of the file system * mounted on the device given. %NULL is returned if no match is found. */
struct super_block *get_super(struct block_device *bdev) { return __get_super(bdev, false); }


Jan Kara20100.00%1100.00%

static struct super_block *__get_super_thawed(struct block_device *bdev, bool excl) { while (1) { struct super_block *s = __get_super(bdev, excl); if (!s || s->s_writers.frozen == SB_UNFROZEN) return s; if (!excl) up_read(&s->s_umount); else up_write(&s