Release 4.7 fs/logfs/logfs_abi.h

Directory: fs/logfs
/*
 * fs/logfs/logfs_abi.h
 *
 * As should be obvious for Linux kernel code, license is GPLv2
 *
 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
 *
 * Public header for logfs.
 */
#ifndef FS_LOGFS_LOGFS_ABI_H

#define FS_LOGFS_LOGFS_ABI_H

/* For out-of-kernel compiles */
#ifndef BUILD_BUG_ON

#define BUILD_BUG_ON(condition) 
/**/
#endif


#define SIZE_CHECK(type, size)					\
static inline void check_##type(void)                           \
{                                                               \
        BUILD_BUG_ON(sizeof(struct type) != (size));            \
}

/*
 * Throughout the logfs code, we're constantly dealing with blocks at
 * various positions or offsets.  To remove confusion, we stricly
 * distinguish between a "position" - the logical position within a
 * file and an "offset" - the physical location within the device.
 *
 * Any usage of the term offset for a logical location or position for
 * a physical one is a bug and should get fixed.
 */

/*
 * Block are allocated in one of several segments depending on their
 * level.  The following levels are used:
 *  0   - regular data block
 *  1   - i1 indirect blocks
 *  2   - i2 indirect blocks
 *  3   - i3 indirect blocks
 *  4   - i4 indirect blocks
 *  5   - i5 indirect blocks
 *  6   - ifile data blocks
 *  7   - ifile i1 indirect blocks
 *  8   - ifile i2 indirect blocks
 *  9   - ifile i3 indirect blocks
 * 10   - ifile i4 indirect blocks
 * 11   - ifile i5 indirect blocks
 * Potential levels to be used in the future:
 * 12   - gc recycled blocks, long-lived data
 * 13   - replacement blocks, short-lived data
 *
 * Levels 1-11 are necessary for robust gc operations and help separate
 * short-lived metadata from longer-lived file data.  In the future,
 * file data should get separated into several segments based on simple
 * heuristics.  Old data recycled during gc operation is expected to be
 * long-lived.  New data is of uncertain life expectancy.  New data
 * used to replace older blocks in existing files is expected to be
 * short-lived.
 */


/* Magic numbers.  64bit for superblock, 32bit for statfs f_type */

#define LOGFS_MAGIC		0x7a3a8e5cb9d5bf67ull

#define LOGFS_MAGIC_U32		0xc97e8168u

/*
 * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
 * Sooner or later that should become configurable and the macros replaced
 * by something superblock-dependent.  Pointers in indirect blocks are and
 * will remain 64bit.
 *
 * LOGFS_BLOCKSIZE      - self-explaining
 * LOGFS_BLOCK_FACTOR   - number of pointers per indirect block
 * LOGFS_BLOCK_BITS     - log2 of LOGFS_BLOCK_FACTOR, used for shifts
 */

#define LOGFS_BLOCKSIZE		(4096ull)

#define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))

#define LOGFS_BLOCK_BITS	(9)

/*
 * Number of blocks at various levels of indirection.  There are 16 direct
 * block pointers plus a single indirect pointer.
 */

#define I0_BLOCKS		(16)

#define I1_BLOCKS		LOGFS_BLOCK_FACTOR

#define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)

#define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)

#define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)

#define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)


#define INDIRECT_INDEX		I0_BLOCKS

#define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)

/*
 * Sizes at which files require another level of indirection.  Files smaller
 * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
 * similar like ext2 fast symlinks.
 *
 * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
 * direct pointers, else through the 1x indirect pointer and so forth.
 */

#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))

#define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)

#define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)

/*
 * Each indirect block pointer must have this flag set, if all block pointers
 * behind it are set, i.e. there is no hole hidden in the shadow of this
 * indirect block pointer.
 */

#define LOGFS_FULLY_POPULATED (1ULL << 63)

#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)

/*
 * LogFS needs to separate data into levels.  Each level is defined as the
 * maximal possible distance from the master inode (inode of the inode file).
 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
 * This effort is necessary to guarantee garbage collection to always make
 * progress.
 *
 * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
 * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
 * the maximal number of levels for one file.
 * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
 * effectively stacked on top of each other.
 */

#define LOGFS_MAX_INDIRECT	(5)

#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)

#define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)

/* Maximum size of filenames */

#define LOGFS_MAX_NAMELEN	(255)

/* Number of segments in the primary journal. */

#define LOGFS_JOURNAL_SEGS	(16)

/* Maximum number of free/erased/etc. segments in journal entries */

#define MAX_CACHED_SEGS		(64)


/*
 * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
 * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
 * its header,
 * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
 * its segment header and the padded space at the end when no further objects
 * fit.
 */

#define LOGFS_OBJECT_HEADERSIZE	(0x1c)

#define LOGFS_SEGMENT_HEADERSIZE (0x18)

#define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)

#define LOGFS_SEGMENT_RESERVE	\
	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)

/*
 * Segment types:
 * SEG_SUPER    - Data or indirect block
 * SEG_JOURNAL  - Inode
 * SEG_OSTORE   - Dentry
 */
enum {
	
SEG_SUPER	= 0x01,
	
SEG_JOURNAL	= 0x02,
	
SEG_OSTORE	= 0x03,
};

/**
 * struct logfs_segment_header - per-segment header in the ostore
 *
 * @crc:                        crc32 of header (there is no data)
 * @pad:                        unused, must be 0
 * @type:                       segment type, see above
 * @level:                      GC level for all objects in this segment
 * @segno:                      segment number
 * @ec:                         erase count for this segment
 * @gec:                        global erase count at time of writing
 */

struct logfs_segment_header {
	
__be32	crc;
	
__be16	pad;
	
__u8	type;
	
__u8	level;
	
__be32	segno;
	
__be32	ec;
	
__be64	gec;
};

SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);


#define LOGFS_FEATURES_INCOMPAT		(0ull)

#define LOGFS_FEATURES_RO_COMPAT	(0ull)

#define LOGFS_FEATURES_COMPAT		(0ull)

/**
 * struct logfs_disk_super - on-medium superblock
 *
 * @ds_magic:                   magic number, must equal LOGFS_MAGIC
 * @ds_crc:                     crc32 of structure starting with the next field
 * @ds_ifile_levels:            maximum number of levels for ifile
 * @ds_iblock_levels:           maximum number of levels for regular files
 * @ds_data_levels:             number of separate levels for data
 * @pad0:                       reserved, must be 0
 * @ds_feature_incompat:        incompatible filesystem features
 * @ds_feature_ro_compat:       read-only compatible filesystem features
 * @ds_feature_compat:          compatible filesystem features
 * @ds_flags:                   flags
 * @ds_segment_shift:           log2 of segment size
 * @ds_block_shift:             log2 of block size
 * @ds_write_shift:             log2 of write size
 * @pad1:                       reserved, must be 0
 * @ds_journal_seg:             segments used by primary journal
 * @ds_root_reserve:            bytes reserved for the superuser
 * @ds_speed_reserve:           bytes reserved to speed up GC
 * @ds_bad_seg_reserve:         number of segments reserved to handle bad blocks
 * @pad2:                       reserved, must be 0
 * @pad3:                       reserved, must be 0
 *
 * Contains only read-only fields.  Read-write fields like the amount of used
 * space is tracked in the dynamic superblock, which is stored in the journal.
 */

struct logfs_disk_super {
	
struct logfs_segment_header ds_sh;
	
__be64	ds_magic;

	
__be32	ds_crc;
	
__u8	ds_ifile_levels;
	
__u8	ds_iblock_levels;
	
__u8	ds_data_levels;
	
__u8	ds_segment_shift;
	
__u8	ds_block_shift;
	
__u8	ds_write_shift;
	
__u8	pad0[6];

	
__be64	ds_filesystem_size;
	
__be32	ds_segment_size;
	
__be32  ds_bad_seg_reserve;

	
__be64	ds_feature_incompat;
	
__be64	ds_feature_ro_compat;

	
__be64	ds_feature_compat;
	
__be64	ds_feature_flags;

	
__be64	ds_root_reserve;
	
__be64  ds_speed_reserve;

	
__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];

	
__be64	ds_super_ofs[2];
	
__be64	pad3[8];
};

SIZE_CHECK(logfs_disk_super, 256);

/*
 * Object types:
 * OBJ_BLOCK    - Data or indirect block
 * OBJ_INODE    - Inode
 * OBJ_DENTRY   - Dentry
 */
enum {
	
OBJ_BLOCK	= 0x04,
	
OBJ_INODE	= 0x05,
	
OBJ_DENTRY	= 0x06,
};

/**
 * struct logfs_object_header - per-object header in the ostore
 *
 * @crc:                        crc32 of header, excluding data_crc
 * @len:                        length of data
 * @type:                       object type, see above
 * @compr:                      compression type
 * @ino:                        inode number
 * @bix:                        block index
 * @data_crc:                   crc32 of payload
 */

struct logfs_object_header {
	
__be32	crc;
	
__be16	len;
	
__u8	type;
	
__u8	compr;
	
__be64	ino;
	
__be64	bix;
	
__be32	data_crc;
} __attribute__((packed));

SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);

/*
 * Reserved inode numbers:
 * LOGFS_INO_MASTER     - master inode (for inode file)
 * LOGFS_INO_ROOT       - root directory
 * LOGFS_INO_SEGFILE    - per-segment used bytes and erase count
 */
enum {
	
LOGFS_INO_MAPPING	= 0x00,
	
LOGFS_INO_MASTER	= 0x01,
	
LOGFS_INO_ROOT		= 0x02,
	
LOGFS_INO_SEGFILE	= 0x03,
	
LOGFS_RESERVED_INOS	= 0x10,
};

/*
 * Inode flags.  High bits should never be written to the medium.  They are
 * reserved for in-memory usage.
 * Low bits should either remain in sync with the corresponding FS_*_FL or
 * reuse slots that obviously don't make sense for logfs.
 *
 * LOGFS_IF_DIRTY       Inode must be written back
 * LOGFS_IF_ZOMBIE      Inode has been deleted
 * LOGFS_IF_STILLBORN   -ENOSPC happened when creating inode
 */

#define LOGFS_IF_COMPRESSED	0x00000004 
/* == FS_COMPR_FL */

#define LOGFS_IF_DIRTY		0x20000000

#define LOGFS_IF_ZOMBIE		0x40000000

#define LOGFS_IF_STILLBORN	0x80000000

/* Flags available to chattr */

#define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)

#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
/* Flags inherited from parent directory on file/directory creation */

#define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)

/**
 * struct logfs_disk_inode - on-medium inode
 *
 * @di_mode:                    file mode
 * @di_pad:                     reserved, must be 0
 * @di_flags:                   inode flags, see above
 * @di_uid:                     user id
 * @di_gid:                     group id
 * @di_ctime:                   change time
 * @di_mtime:                   modify time
 * @di_refcount:                reference count (aka nlink or link count)
 * @di_generation:              inode generation, for nfs
 * @di_used_bytes:              number of bytes used
 * @di_size:                    file size
 * @di_data:                    data pointers
 */

struct logfs_disk_inode {
	
__be16	di_mode;
	
__u8	di_height;
	
__u8	di_pad;
	
__be32	di_flags;
	
__be32	di_uid;
	
__be32	di_gid;

	
__be64	di_ctime;
	
__be64	di_mtime;

	
__be64	di_atime;
	
__be32	di_refcount;
	
__be32	di_generation;

	
__be64	di_used_bytes;
	
__be64	di_size;

	
__be64	di_data[LOGFS_EMBEDDED_FIELDS];
};

SIZE_CHECK(logfs_disk_inode, 200);


#define INODE_POINTER_OFS \
	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))

#define INODE_USED_OFS \
	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))

#define INODE_SIZE_OFS \
	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))

#define INODE_HEIGHT_OFS	(0)

/**
 * struct logfs_disk_dentry - on-medium dentry structure
 *
 * @ino:                        inode number
 * @namelen:                    length of file name
 * @type:                       file type, identical to bits 12..15 of mode
 * @name:                       file name
 */
/* FIXME: add 6 bytes of padding to remove the __packed */

struct logfs_disk_dentry {
	
__be64	ino;
	
__be16	namelen;
	
__u8	type;
	
__u8	name[LOGFS_MAX_NAMELEN];
} __attribute__((packed));

SIZE_CHECK(logfs_disk_dentry, 266);


#define RESERVED		0xffffffff

#define BADSEG			0xffffffff
/**
 * struct logfs_segment_entry - segment file entry
 *
 * @ec_level:                   erase count and level
 * @valid:                      number of valid bytes
 *
 * Segment file contains one entry for every segment.  ec_level contains the
 * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
 * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
 * of valid bytes or RESERVED (-1 again) if the segment is used for either the
 * superblock or the journal, or when the segment is bad.
 */

struct logfs_segment_entry {
	
__be32	ec_level;
	
__be32	valid;
};

SIZE_CHECK(logfs_segment_entry, 8);

/**
 * struct logfs_journal_header - header for journal entries (JEs)
 *
 * @h_crc:                      crc32 of journal entry
 * @h_len:                      length of compressed journal entry,
 *                              not including header
 * @h_datalen:                  length of uncompressed data
 * @h_type:                     JE type
 * @h_compr:                    compression type
 * @h_pad:                      reserved
 */

struct logfs_journal_header {
	
__be32	h_crc;
	
__be16	h_len;
	
__be16	h_datalen;
	
__be16	h_type;
	
__u8	h_compr;
	
__u8	h_pad[5];
};

SIZE_CHECK(logfs_journal_header, 16);

/*
 * Life expectency of data.
 * VIM_DEFAULT          - default vim
 * VIM_SEGFILE          - for segment file only - very short-living
 * VIM_GC               - GC'd data - likely long-living
 */

enum logfs_vim {
	
VIM_DEFAULT	= 0,
	
VIM_SEGFILE	= 1,
};

/**
 * struct logfs_je_area - wbuf header
 *
 * @segno:                      segment number of area
 * @used_bytes:                 number of bytes already used
 * @gc_level:                   GC level
 * @vim:                        life expectancy of data
 *
 * "Areas" are segments currently being used for writing.  There is at least
 * one area per GC level.  Several may be used to separate long-living from
 * short-living data.  If an area with unknown vim is encountered, it can
 * simply be closed.
 * The write buffer immediately follow this header.
 */

struct logfs_je_area {
	
__be32	segno;
	
__be32	used_bytes;
	
__u8	gc_level;
	
__u8	vim;
} __attribute__((packed));

SIZE_CHECK(logfs_je_area, 10);


#define MAX_JOURNAL_HEADER \
	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))

/**
 * struct logfs_je_dynsb - dynamic superblock
 *
 * @ds_gec:                     global erase count
 * @ds_sweeper:                 current position of GC "sweeper"
 * @ds_rename_dir:              source directory ino (see dir.c documentation)
 * @ds_rename_pos:              position of source dd (see dir.c documentation)
 * @ds_victim_ino:              victims of incomplete dir operation (see dir.c)
 * @ds_victim_ino:              parent inode of victim (see dir.c)
 * @ds_used_bytes:              number of used bytes
 */

struct logfs_je_dynsb {
	
__be64	ds_gec;
	
__be64	ds_sweeper;

	
__be64	ds_rename_dir;
	
__be64	ds_rename_pos;

	
__be64	ds_victim_ino;
	
__be64	ds_victim_parent; /* XXX */

	
__be64	ds_used_bytes;
	
__be32	ds_generation;
	
__be32	pad;
};

SIZE_CHECK(logfs_je_dynsb, 64);

/**
 * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
 *
 * @da_size:                    size of inode file
 * @da_last_ino:                last created inode
 * @da_used_bytes:              number of bytes used
 * @da_data:                    data pointers
 */

struct logfs_je_anchor {
	
__be64	da_size;
	
__be64	da_last_ino;

	
__be64	da_used_bytes;
	
u8	da_height;
	
u8	pad[7];

	
__be64	da_data[LOGFS_EMBEDDED_FIELDS];
};

SIZE_CHECK(logfs_je_anchor, 168);

/**
 * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
 *
 * @so_segment:                 segments used for 2nd journal
 *
 * Length of the array is given by h_len field in the header.
 */

struct logfs_je_spillout {
	
__be64	so_segment[0];
};

SIZE_CHECK(logfs_je_spillout, 0);

/**
 * struct logfs_je_journal_ec - erase counts for all journal segments
 *
 * @ec:                         erase count
 *
 * Length of the array is given by h_len field in the header.
 */

struct logfs_je_journal_ec {
	
__be32	ec[0];
};

SIZE_CHECK(logfs_je_journal_ec, 0);

/**
 * struct logfs_je_free_segments - list of free segmetns with erase count
 */

struct logfs_je_free_segments {
	
__be32	segno;
	
__be32	ec;
};

SIZE_CHECK(logfs_je_free_segments, 8);

/**
 * struct logfs_seg_alias - list of segment aliases
 */

struct logfs_seg_alias {
	
__be32	old_segno;
	
__be32	new_segno;
};

SIZE_CHECK(logfs_seg_alias, 8);

/**
 * struct logfs_obj_alias - list of object aliases
 */

struct logfs_obj_alias {
	
__be64	ino;
	
__be64	bix;
	
__be64	val;
	
u8	level;
	
u8	pad[5];
	
__be16	child_no;
};

SIZE_CHECK(logfs_obj_alias, 32);

/**
 * Compression types.
 *
 * COMPR_NONE   - uncompressed
 * COMPR_ZLIB   - compressed with zlib
 */
enum {
	
COMPR_NONE	= 0,
	
COMPR_ZLIB	= 1,
};

/*
 * Journal entries come in groups of 16.  First group contains unique
 * entries, next groups contain one entry per level
 *
 * JE_FIRST     - smallest possible journal entry number
 *
 * JEG_BASE     - base group, containing unique entries
 * JE_COMMIT    - commit entry, validates all previous entries
 * JE_DYNSB     - dynamic superblock, anything that ought to be in the
 *                superblock but cannot because it is read-write data
 * JE_ANCHOR    - anchor aka master inode aka inode file's inode
 * JE_ERASECOUNT  erasecounts for all journal segments
 * JE_SPILLOUT  - unused
 * JE_SEG_ALIAS - aliases segments
 * JE_AREA      - area description
 *
 * JE_LAST      - largest possible journal entry number
 */
enum {
	
JE_FIRST	= 0x01,

	
JEG_BASE	= 0x00,
	
JE_COMMIT	= 0x02,
	
JE_DYNSB	= 0x03,
	
JE_ANCHOR	= 0x04,
	
JE_ERASECOUNT	= 0x05,
	
JE_SPILLOUT	= 0x06,
	
JE_OBJ_ALIAS	= 0x0d,
	
JE_AREA		= 0x0e,

	
JE_LAST		= 0x0e,
};

#endif
Overall Contributors
Person Tokens Prop Commits CommitProp
joern engel joern engel 904 99.56% 4 80.00%
anand gadiyar anand gadiyar 4 0.44% 1 20.00%
Total 908 100.00% 5 100.00%
  Directory: fs/logfs

Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
	Person	Tokens	Prop	Commits	CommitProp
joern engel	joern engel	904	99.56%	4	80.00%
anand gadiyar	anand gadiyar	4	0.44%	1	20.00%
	Total	908	100.00%	5	100.00%
cregit-Linux how code gets into the kernel

Release 4.7 fs/logfs/logfs_abi.h

Overall Contributors