cregit-Linux how code gets into the kernel

Release 4.7 fs/logfs/logfs_abi.h

Directory: fs/logfs
/*
 * fs/logfs/logfs_abi.h
 *
 * As should be obvious for Linux kernel code, license is GPLv2
 *
 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
 *
 * Public header for logfs.
 */
#ifndef FS_LOGFS_LOGFS_ABI_H

#define FS_LOGFS_LOGFS_ABI_H

/* For out-of-kernel compiles */
#ifndef BUILD_BUG_ON

#define BUILD_BUG_ON(condition) 
/**/
#endif


#define SIZE_CHECK(type, size)					\
static inline void check_##type(void)                           \
{                                                               \
        BUILD_BUG_ON(sizeof(struct type) != (size));            \
}

/*
 * Throughout the logfs code, we're constantly dealing with blocks at
 * various positions or offsets.  To remove confusion, we stricly
 * distinguish between a "position" - the logical position within a
 * file and an "offset" - the physical location within the device.
 *
 * Any usage of the term offset for a logical location or position for
 * a physical one is a bug and should get fixed.
 */

/*
 * Block are allocated in one of several segments depending on their
 * level.  The following levels are used:
 *  0   - regular data block
 *  1   - i1 indirect blocks
 *  2   - i2 indirect blocks
 *  3   - i3 indirect blocks
 *  4   - i4 indirect blocks
 *  5   - i5 indirect blocks
 *  6   - ifile data blocks
 *  7   - ifile i1 indirect blocks
 *  8   - ifile i2 indirect blocks
 *  9   - ifile i3 indirect blocks
 * 10   - ifile i4 indirect blocks
 * 11   - ifile i5 indirect blocks
 * Potential levels to be used in the future:
 * 12   - gc recycled blocks, long-lived data
 * 13   - replacement blocks, short-lived data
 *
 * Levels 1-11 are necessary for robust gc operations and help separate
 * short-lived metadata from longer-lived file data.  In the future,
 * file data should get separated into several segments based on simple
 * heuristics.  Old data recycled during gc operation is expected to be
 * long-lived.  New data is of uncertain life expectancy.  New data
 * used to replace older blocks in existing files is expected to be
 * short-lived.
 */


/* Magic numbers.  64bit for superblock, 32bit for statfs f_type */

#define LOGFS_MAGIC		0x7a3a8e5cb9d5bf67ull

#define LOGFS_MAGIC_U32		0xc97e8168u

/*
 * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
 * Sooner or later that should become configurable and the macros replaced
 * by something superblock-dependent.  Pointers in indirect blocks are and
 * will remain 64bit.
 *
 * LOGFS_BLOCKSIZE      - self-explaining
 * LOGFS_BLOCK_FACTOR   - number of pointers per indirect block
 * LOGFS_BLOCK_BITS     - log2 of LOGFS_BLOCK_FACTOR, used for shifts
 */

#define LOGFS_BLOCKSIZE		(4096ull)

#define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))

#define LOGFS_BLOCK_BITS	(9)

/*
 * Number of blocks at various levels of indirection.  There are 16 direct
 * block pointers plus a single indirect pointer.
 */

#define I0_BLOCKS		(16)

#define I1_BLOCKS		LOGFS_BLOCK_FACTOR

#define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)

#define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)

#define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)

#define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)


#define INDIRECT_INDEX		I0_BLOCKS

#define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)

/*
 * Sizes at which files require another level of indirection.  Files smaller
 * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
 * similar like ext2 fast symlinks.
 *
 * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
 * direct pointers, else through the 1x indirect pointer and so forth.
 */

#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))

#define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)

/*
 * Each indirect block pointer must have this flag set, if all block pointers
 * behind it are set, i.e. there is no hole hidden in the shadow of this
 * indirect block pointer.
 */

#define LOGFS_FULLY_POPULATED (1ULL << 63)

#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)

/*
 * LogFS needs to separate data into levels.  Each level is defined as the
 * maximal possible distance from the master inode (inode of the inode file).
 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
 * This effort is necessary to guarantee garbage collection to always make
 * progress.
 *
 * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
 * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
 * the maximal number of levels for one file.
 * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
 * effectively stacked on top of each other.
 */

#define LOGFS_MAX_INDIRECT	(5)

#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)

#define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)

/* Maximum size of filenames */

#define LOGFS_MAX_NAMELEN	(255)

/* Number of segments in the primary journal. */

#define LOGFS_JOURNAL_SEGS	(16)

/* Maximum number of free/erased/etc. segments in journal entries */

#define MAX_CACHED_SEGS		(64)


/*
 * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
 * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
 * its header,
 * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
 * its segment header and the padded space at the end when no further objects
 * fit.
 */

#define LOGFS_OBJECT_HEADERSIZE	(0x1c)

#define LOGFS_SEGMENT_HEADERSIZE (0x18)

#define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)

#define LOGFS_SEGMENT_RESERVE	\
	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)

/*
 * Segment types:
 * SEG_SUPER    - Data or indirect block
 * SEG_JOURNAL  - Inode
 * SEG_OSTORE   - Dentry
 */
enum {
	
SEG_SUPER	= 0x01,
	
SEG_JOURNAL	= 0x02,
	
SEG_OSTORE	= 0x03,
};

/**
 * struct logfs_segment_header - per-segment header in the ostore
 *
 * @crc:                        crc32 of header (there is no data)
 * @pad:                        unused, must be 0
 * @type:                       segment type, see above
 * @level:                      GC level for all objects in this segment
 * @segno:                      segment number
 * @ec:                         erase count for this segment
 * @gec:                        global erase count at time of writing
 */

struct logfs_segment_header {
	
__be32	crc;
	
__be16	pad;
	
__u8	type;
	
__u8	level;
	
__be32	segno;
	
__be32	ec;
	
__be64	gec;
};

SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);


#define LOGFS_FEATURES_INCOMPAT		(0ull)

#define LOGFS_FEATURES_RO_COMPAT	(0ull)

#define LOGFS_FEATURES_COMPAT		(0ull)

/**
 * struct logfs_disk_super - on-medium superblock
 *
 * @ds_magic:                   magic number, must equal LOGFS_MAGIC
 * @ds_crc:                     crc32 of structure starting with the next field
 * @ds_ifile_levels:            maximum number of levels for ifile
 * @ds_iblock_levels:           maximum number of levels for regular files
 * @ds_data_levels:             number of separate levels for data
 * @pad0:                       reserved, must be 0
 * @ds_feature_incompat:        incompatible filesystem features
 * @ds_feature_ro_compat:       read-only compatible filesystem features
 * @ds_feature_compat:          compatible filesystem features
 * @ds_flags:                   flags
 * @ds_segment_shift:           log2 of segment size
 * @ds_block_shift:             log2 of block size
 * @ds_write_shift:             log2 of write size
 * @pad1:                       reserved, must be 0
 * @ds_journal_seg:             segments used by primary journal
 * @ds_root_reserve:            bytes reserved for the superuser
 * @ds_speed_reserve:           bytes reserved to speed up GC
 * @ds_bad_seg_reserve:         number of segments reserved to handle bad blocks
 * @pad2:                       reserved, must be 0
 * @pad3:                       reserved, must be 0
 *
 * Contains only read-only fields.  Read-write fields like the amount of used
 * space is tracked in the dynamic superblock, which is stored in the journal.
 */

struct logfs_disk_super {
	
struct logfs_segment_header ds_sh;
	
__be64	ds_magic;

	
__be32	ds_crc;
	
__u8	ds_ifile_levels;
	
__u8	ds_iblock_levels;
	
__u8	ds_data_levels;
	
__u8	ds_segment_shift;
	
__u8	ds_block_shift;
	
__u8	ds_write_shift;
	
__u8	pad0[6];

	
__be64	ds_filesystem_size;
	
__be32	ds_segment_size;
	
__be32  ds_bad_seg_reserve;

	
__be64	ds_feature_incompat;
	
__be64	ds_feature_ro_compat;

	
__be64	ds_feature_compat;
	
__be64	ds_feature_flags;

	
__be64	ds_root_reserve;
	
__be64  ds_speed_reserve;

	
__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];

	
__be64	ds_super_ofs[2];
	
__be64	pad3[8];
};

SIZE_CHECK(logfs_disk_super, 256);

/*
 * Object types:
 * OBJ_BLOCK    - Data or indirect block
 * OBJ_INODE    - Inode
 * OBJ_DENTRY   - Dentry
 */
enum {
	
OBJ_BLOCK	= 0x04,
	
OBJ_INODE	= 0x05,
	
OBJ_DENTRY	= 0x06,
};

/**
 * struct logfs_object_header - per-object header in the ostore
 *
 * @crc:                        crc32 of header, excluding data_crc
 * @len:                        length of data
 * @type:                       object type, see above
 * @compr:                      compression type
 * @ino:                        inode number
 * @bix:                        block index
 * @data_crc:                   crc32 of payload
 */

struct logfs_object_header {
	
__be32	crc;
	
__be16	len;
	
__u8	type;
	
__u8	compr;
	
__be64	ino;
	
__be64	bix;
	
__be32	data_crc;
} __attribute__((packed));

SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);

/*
 * Reserved inode numbers:
 * LOGFS_INO_MASTER     - master inode (for inode file)
 * LOGFS_INO_ROOT       - root directory
 * LOGFS_INO_SEGFILE    - per-segment used bytes and erase count
 */
enum {
	
LOGFS_INO_MAPPING	= 0x00,
	
LOGFS_INO_MASTER	= 0x01,
	
LOGFS_INO_ROOT		= 0x02,
	
LOGFS_INO_SEGFILE	= 0x03,
	
LOGFS_RESERVED_INOS	= 0x10,
};

/*
 * Inode flags.  High bits should never be written to the medium.  They are
 * reserved for in-memory usage.
 * Low bits should either remain in sync with the corresponding FS_*_FL or
 * reuse slots that obviously don't make sense for logfs.
 *
 * LOGFS_IF_DIRTY       Inode must be written back
 * LOGFS_IF_ZOMBIE      Inode has been deleted
 * LOGFS_IF_STILLBORN   -ENOSPC happened when creating inode
 */

#define LOGFS_IF_COMPRESSED	0x00000004 
/* == FS_COMPR_FL */

#define LOGFS_IF_DIRTY		0x20000000

#define LOGFS_IF_ZOMBIE		0x40000000

#define LOGFS_IF_STILLBORN	0x80000000

/* Flags available to chattr */

#define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)

#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
/* Flags inherited from parent directory on file/directory creation */

#define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)

/**
 * struct logfs_disk_inode - on-medium inode
 *
 * @di_mode:                    file mode
 * @di_pad:                     reserved, must be 0
 * @di_flags:                   inode flags, see above
 * @di_uid:                     user id
 * @di_gid:                     group id
 * @di_ctime:                   change time
 * @di_mtime:                   modify time
 * @di_refcount:                reference count (aka nlink or link count)
 * @di_generation:              inode generation, for nfs
 * @di_used_bytes:              number of bytes used
 * @di_size:                    file size
 * @di_data:                    data pointers
 */

struct logfs_disk_inode {
	
__be16	di_mode;
	
__u8	di_height;
	
__u8	di_pad;
	
__be32	di_flags;
	
__be32	di_uid;
	
__be32	di_gid;

	
__be64	di_ctime;
	
__be64	di_mtime;

	
__be64	di_atime;
	
__be32	di_refcount;
	
__be32	di_generation;

	
__be64	di_used_bytes;
	
__be64	di_size;

	
__be64	di_data[LOGFS_EMBEDDED_FIELDS];
};

SIZE_CHECK(logfs_disk_inode, 200);


#define INODE_POINTER_OFS \
	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))

#define INODE_USED_OFS \
	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))

#define INODE_SIZE_OFS \
	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))

#define INODE_HEIGHT_OFS	(0)

/**
 * struct logfs_disk_dentry - on-medium dentry structure
 *
 * @ino:                        inode number
 * @namelen:                    length of file name
 * @type:                       file type, identical to bits 12..15 of mode
 * @name:                       file name
 */
/* FIXME: add 6 bytes of padding to remove the __packed */

struct logfs_disk_dentry {
	
__be64	ino;
	
__be16	namelen;
	
__u8	type;
	
__u8	name[LOGFS_MAX_NAMELEN];
} __attribute__((packed));

SIZE_CHECK(logfs_disk_dentry, 266);


#define RESERVED		0xffffffff

#define BADSEG			0xffffffff
/**
 * struct logfs_segment_entry - segment file entry
 *
 * @ec_level:                   erase count and level
 * @valid:                      number of valid bytes
 *
 * Segment file contains one entry for every segment.  ec_level contains the
 * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
 * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
 * of valid bytes or RESERVED (-1 again) if the segment is used for either the
 * superblock or the journal, or when the segment is bad.
 */

struct logfs_segment_entry {
	
__be32	ec_level;
	
__be32	valid;
};

SIZE_CHECK(logfs_segment_entry, 8);

/**
 * struct logfs_journal_header - header for journal entries (JEs)
 *
 * @h_crc:                      crc32 of journal entry
 * @h_len:                      length of compressed journal entry,
 *                              not including header
 * @h_datalen:                  length of uncompressed data
 * @h_type:                     JE type
 * @h_compr:                    compression type
 * @h_pad:                      reserved
 */

struct logfs_journal_header {
	
__be32	h_crc;
	
__be16	h_len;
	
__be16	h_datalen;
	
__be16	h_type;
	
__u8	h_compr;
	
__u8	h_pad[5];
};

SIZE_CHECK(logfs_journal_header, 16);

/*
 * Life expectency of data.
 * VIM_DEFAULT          - default vim
 * VIM_SEGFILE          - for segment file only - very short-living
 * VIM_GC               - GC'd data - likely long-living
 */

enum logfs_vim {
	
VIM_DEFAULT	= 0,
	
VIM_SEGFILE	= 1,
};

/**
 * struct logfs_je_area - wbuf header
 *
 * @segno:                      segment number of area
 * @used_bytes:                 number of bytes already used
 * @gc_level:                   GC level
 * @vim:                        life expectancy of data
 *
 * "Areas" are segments currently being used for writing.  There is at least
 * one area per GC level.  Several may be used to separate long-living from
 * short-living data.  If an area with unknown vim is encountered, it can
 * simply be closed.
 * The write buffer immediately follow this header.
 */

struct logfs_je_area {
	
__be32	segno;
	
__be32	used_bytes;
	
__u8	gc_level;
	
__u8	vim;
} __attribute__((packed));

SIZE_CHECK(logfs_je_area, 10);


#define MAX_JOURNAL_HEADER \
	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))

/**
 * struct logfs_je_dynsb - dynamic superblock
 *
 * @ds_gec:                     global erase count
 * @ds_sweeper:                 current position of GC "sweeper"
 * @ds_rename_dir:              source directory ino (see dir.c documentation)
 * @ds_rename_pos:              position of source dd (see dir.c documentation)
 * @ds_victim_ino:              victims of incomplete dir operation (see dir.c)
 * @ds_victim_ino:              parent inode of victim (see dir.c)
 * @ds_used_bytes:              number of used bytes
 */

struct logfs_je_dynsb {
	
__be64	ds_gec;
	
__be64	ds_sweeper;

	
__be64	ds_rename_dir;
	
__be64	ds_rename_pos;

	
__be64	ds_victim_ino;
	
__be64	ds_victim_parent; /* XXX */

	
__be64	ds_used_bytes;
	
__be32	ds_generation;
	
__be32	pad;
};

SIZE_CHECK(logfs_je_dynsb, 64);

/**
 * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
 *
 * @da_size:                    size of inode file
 * @da_last_ino:                last created inode
 * @da_used_bytes:              number of bytes used
 * @da_data:                    data pointers
 */

struct logfs_je_anchor {
	
__be64	da_size;
	
__be64	da_last_ino;

	
__be64	da_used_bytes;
	
u8	da_height;
	
u8	pad[7];

	
__be64	da_data[LOGFS_EMBEDDED_FIELDS];
};

SIZE_CHECK(logfs_je_anchor, 168);

/**
 * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
 *
 * @so_segment:                 segments used for 2nd journal
 *
 * Length of the array is given by h_len field in the header.
 */

struct logfs_je_spillout {
	
__be64	so_segment[0];
};

SIZE_CHECK(logfs_je_spillout, 0);

/**
 * struct logfs_je_journal_ec - erase counts for all journal segments
 *
 * @ec:                         erase count
 *
 * Length of the array is given by h_len field in the header.
 */

struct logfs_je_journal_ec {
	
__be32	ec[0];
};

SIZE_CHECK(logfs_je_journal_ec, 0);

/**
 * struct logfs_je_free_segments - list of free segmetns with erase count
 */

struct logfs_je_free_segments {
	
__be32	segno;
	
__be32	ec;
};

SIZE_CHECK(logfs_je_free_segments, 8);

/**
 * struct logfs_seg_alias - list of segment aliases
 */

struct logfs_seg_alias {
	
__be32	old_segno;
	
__be32	new_segno;
};

SIZE_CHECK(logfs_seg_alias, 8);

/**
 * struct logfs_obj_alias - list of object aliases
 */

struct logfs_obj_alias {
	
__be64	ino;
	
__be64	bix;
	
__be64	val;
	
u8	level;
	
u8	pad[5];
	
__be16	child_no;
};

SIZE_CHECK(logfs_obj_alias, 32);

/**
 * Compression types.
 *
 * COMPR_NONE   - uncompressed
 * COMPR_ZLIB   - compressed with zlib
 */
enum {
	
COMPR_NONE	= 0,
	
COMPR_ZLIB	= 1,
};

/*
 * Journal entries come in groups of 16.  First group contains unique
 * entries, next groups contain one entry per level
 *
 * JE_FIRST     - smallest possible journal entry number
 *
 * JEG_BASE     - base group, containing unique entries
 * JE_COMMIT    - commit entry, validates all previous entries
 * JE_DYNSB     - dynamic superblock, anything that ought to be in the
 *                superblock but cannot because it is read-write data
 * JE_ANCHOR    - anchor aka master inode aka inode file's inode
 * JE_ERASECOUNT  erasecounts for all journal segments
 * JE_SPILLOUT  - unused
 * JE_SEG_ALIAS - aliases segments
 * JE_AREA      - area description
 *
 * JE_LAST      - largest possible journal entry number
 */
enum {
	
JE_FIRST	= 0x01,

	
JEG_BASE	= 0x00,
	
JE_COMMIT	= 0x02,
	
JE_DYNSB	= 0x03,
	
JE_ANCHOR	= 0x04,
	
JE_ERASECOUNT	= 0x05,
	
JE_SPILLOUT	= 0x06,
	
JE_OBJ_ALIAS	= 0x0d,
	
JE_AREA		= 0x0e,

	
JE_LAST		= 0x0e,
};

#endif

Overall Contributors

PersonTokensPropCommitsCommitProp
joern engeljoern engel90499.56%480.00%
anand gadiyaranand gadiyar40.44%120.00%
Total908100.00%5100.00%
Directory: fs/logfs
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
{% endraw %}