Release 4.11 net/ceph/osdmap.c
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/div64.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/decode.h>
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
char *ceph_osdmap_state_str(char *str, int len, int state)
{
if (!len)
return str;
if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
snprintf(str, len, "exists, up");
else if (state & CEPH_OSD_EXISTS)
snprintf(str, len, "exists");
else if (state & CEPH_OSD_UP)
snprintf(str, len, "up");
else
snprintf(str, len, "doesn't exist");
return str;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 69 | 75.00% | 1 | 50.00% |
Cong Ding | 23 | 25.00% | 1 | 50.00% |
Total | 92 | 100.00% | 2 | 100.00% |
/* maps */
static int calc_bits_of(unsigned int t)
{
int b = 0;
while (t) {
t = t >> 1;
b++;
}
return b;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 32 | 96.97% | 1 | 50.00% |
Eric Dumazet | 1 | 3.03% | 1 | 50.00% |
Total | 33 | 100.00% | 2 | 100.00% |
/*
* the foo_mask is the smallest value 2^n-1 that is >= foo.
*/
static void calc_pg_masks(struct ceph_pg_pool_info *pi)
{
pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 49 | 100.00% | 1 | 100.00% |
Total | 49 | 100.00% | 1 | 100.00% |
/*
* decode crush map
*/
static int crush_decode_uniform_bucket(void **p, void *end,
struct crush_bucket_uniform *b)
{
dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
b->item_weight = ceph_decode_32(p);
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 72 | 100.00% | 2 | 100.00% |
Total | 72 | 100.00% | 2 | 100.00% |
static int crush_decode_list_bucket(void **p, void *end,
struct crush_bucket_list *b)
{
int j;
dout("crush_decode_list_bucket %p to %p\n", *p, end);
b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->item_weights == NULL)
return -ENOMEM;
b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->sum_weights == NULL)
return -ENOMEM;
ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
for (j = 0; j < b->h.size; j++) {
b->item_weights[j] = ceph_decode_32(p);
b->sum_weights[j] = ceph_decode_32(p);
}
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 171 | 100.00% | 2 | 100.00% |
Total | 171 | 100.00% | 2 | 100.00% |
static int crush_decode_tree_bucket(void **p, void *end,
struct crush_bucket_tree *b)
{
int j;
dout("crush_decode_tree_bucket %p to %p\n", *p, end);
ceph_decode_8_safe(p, end, b->num_nodes, bad);
b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
if (b->node_weights == NULL)
return -ENOMEM;
ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
for (j = 0; j < b->num_nodes; j++)
b->node_weights[j] = ceph_decode_32(p);
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 129 | 99.23% | 2 | 66.67% |
Ilya Dryomov | 1 | 0.77% | 1 | 33.33% |
Total | 130 | 100.00% | 3 | 100.00% |
static int crush_decode_straw_bucket(void **p, void *end,
struct crush_bucket_straw *b)
{
int j;
dout("crush_decode_straw_bucket %p to %p\n", *p, end);
b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->item_weights == NULL)
return -ENOMEM;
b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->straws == NULL)
return -ENOMEM;
ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
for (j = 0; j < b->h.size; j++) {
b->item_weights[j] = ceph_decode_32(p);
b->straws[j] = ceph_decode_32(p);
}
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 171 | 100.00% | 2 | 100.00% |
Total | 171 | 100.00% | 2 | 100.00% |
static int crush_decode_straw2_bucket(void **p, void *end,
struct crush_bucket_straw2 *b)
{
int j;
dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
if (b->item_weights == NULL)
return -ENOMEM;
ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
for (j = 0; j < b->h.size; j++)
b->item_weights[j] = ceph_decode_32(p);
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilya Dryomov | 123 | 100.00% | 1 | 100.00% |
Total | 123 | 100.00% | 1 | 100.00% |
static int skip_name_map(void **p, void *end)
{
int len;
ceph_decode_32_safe(p, end, len ,bad);
while (len--) {
int strlen;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, strlen, bad);
*p += strlen;
}
return 0;
bad:
return -EINVAL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 72 | 100.00% | 1 | 100.00% |
Total | 72 | 100.00% | 1 | 100.00% |
static void crush_finalize(struct crush_map *c)
{
__s32 b;
/* Space for the array of pointers to per-bucket workspace */
c->working_size = sizeof(struct crush_work) +
c->max_buckets * sizeof(struct crush_work_bucket *);
for (b = 0; b < c->max_buckets; b++) {
if (!c->buckets[b])
continue;
switch (c->buckets[b]->alg) {
default:
/*
* The base case, permutation variables and
* the pointer to the permutation array.
*/
c->working_size += sizeof(struct crush_work_bucket);
break;
}
/* Every bucket has a permutation array. */
c->working_size += c->buckets[b]->size * sizeof(__u32);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilya Dryomov | 109 | 100.00% | 1 | 100.00% |
Total | 109 | 100.00% | 1 | 100.00% |
static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
int err = -EINVAL;
int i, j;
void **p = &pbyval;
void *start = pbyval;
u32 magic;
u32 num_name_maps;
dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
c = kzalloc(sizeof(*c), GFP_NOFS);
if (c == NULL)
return ERR_PTR(-ENOMEM);
/* set tunables to default values */
c->choose_local_tries = 2;
c->choose_local_fallback_tries = 5;
c->choose_total_tries = 19;
c->chooseleaf_descend_once = 0;
ceph_decode_need(p, end, 4*sizeof(u32), bad);
magic = ceph_decode_32(p);
if (magic != CRUSH_MAGIC) {
pr_err("crush_decode magic %x != current %x\n",
(unsigned int)magic, (unsigned int)CRUSH_MAGIC);
goto bad;
}
c->max_buckets = ceph_decode_32(p);
c->max_rules = ceph_decode_32(p);
c->max_devices = ceph_decode_32(p);
c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
if (c->buckets == NULL)
goto badmem;
c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
if (c->rules == NULL)
goto badmem;
/* buckets */
for (i = 0; i < c->max_buckets; i++) {
int size = 0;
u32 alg;
struct crush_bucket *b;
ceph_decode_32_safe(p, end, alg, bad);
if (alg == 0) {
c->buckets[i] = NULL;
continue;
}
dout("crush_decode bucket %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
switch (alg) {
case CRUSH_BUCKET_UNIFORM:
size = sizeof(struct crush_bucket_uniform);
break;
case CRUSH_BUCKET_LIST:
size = sizeof(struct crush_bucket_list);
break;
case CRUSH_BUCKET_TREE:
size = sizeof(struct crush_bucket_tree);
break;
case CRUSH_BUCKET_STRAW:
size = sizeof(struct crush_bucket_straw);
break;
case CRUSH_BUCKET_STRAW2:
size = sizeof(struct crush_bucket_straw2);
break;
default:
err = -EINVAL;
goto bad;
}
BUG_ON(size == 0);
b = c->buckets[i] = kzalloc(size, GFP_NOFS);
if (b == NULL)
goto badmem;
ceph_decode_need(p, end, 4*sizeof(u32), bad);
b->id = ceph_decode_32(p);
b->type = ceph_decode_16(p);
b->alg = ceph_decode_8(p);
b->hash = ceph_decode_8(p);
b->weight = ceph_decode_32(p);
b->size = ceph_decode_32(p);
dout("crush_decode bucket size %d off %x %p to %p\n",
b->size, (int)(*p-start), *p, end);
b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
if (b->items == NULL)
goto badmem;
ceph_decode_need(p, end, b->size*sizeof(u32), bad);
for (j = 0; j < b->size; j++)
b->items[j] = ceph_decode_32(p);
switch (b->alg) {
case CRUSH_BUCKET_UNIFORM:
err = crush_decode_uniform_bucket(p, end,
(struct crush_bucket_uniform *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_LIST:
err = crush_decode_list_bucket(p, end,
(struct crush_bucket_list *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_TREE:
err = crush_decode_tree_bucket(p, end,
(struct crush_bucket_tree *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_STRAW:
err = crush_decode_straw_bucket(p, end,
(struct crush_bucket_straw *)b);
if (err < 0)
goto bad;
break;
case CRUSH_BUCKET_STRAW2:
err = crush_decode_straw2_bucket(p, end,
(struct crush_bucket_straw2 *)b);
if (err < 0)
goto bad;
break;
}
}
/* rules */
dout("rule vec is %p\n", c->rules);
for (i = 0; i < c->max_rules; i++) {
u32 yes;
struct crush_rule *r;
ceph_decode_32_safe(p, end, yes, bad);
if (!yes) {
dout("crush_decode NO rule %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
c->rules[i] = NULL;
continue;
}
dout("crush_decode rule %d off %x %p to %p\n",
i, (int)(*p-start), *p, end);
/* len */
ceph_decode_32_safe(p, end, yes, bad);
#if BITS_PER_LONG == 32
err = -EINVAL;
if (yes > (ULONG_MAX - sizeof(*r))
/ sizeof(struct crush_rule_step))
goto bad;
#endif
r = c->rules[i] = kmalloc(sizeof(*r) +
yes*sizeof(struct crush_rule_step),
GFP_NOFS);
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
r->len = yes;
ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
for (j = 0; j < r->len; j++) {
r->steps[j].op = ceph_decode_32(p);
r->steps[j].arg1 = ceph_decode_32(p);
r->steps[j].arg2 = ceph_decode_32(p);
}
}
/* ignore trailing name maps. */
for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
err = skip_name_map(p, end);
if (err < 0)
goto done;
}
/* tunables */
ceph_decode_need(p, end, 3*sizeof(u32), done);
c->choose_local_tries = ceph_decode_32(p);
c->choose_local_fallback_tries = ceph_decode_32(p);
c->choose_total_tries = ceph_decode_32(p);
dout("crush decode tunable choose_local_tries = %d\n",
c->choose_local_tries);
dout("crush decode tunable choose_local_fallback_tries = %d\n",
c->choose_local_fallback_tries);
dout("crush decode tunable choose_total_tries = %d\n",
c->choose_total_tries);
ceph_decode_need(p, end, sizeof(u32), done);
c->chooseleaf_descend_once = ceph_decode_32(p);
dout("crush decode tunable chooseleaf_descend_once = %d\n",
c->chooseleaf_descend_once);
ceph_decode_need(p, end, sizeof(u8), done);
c->chooseleaf_vary_r = ceph_decode_8(p);
dout("crush decode tunable chooseleaf_vary_r = %d\n",
c->chooseleaf_vary_r);
/* skip straw_calc_version, allowed_bucket_algs */
ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
*p += sizeof(u8) + sizeof(u32);
ceph_decode_need(p, end, sizeof(u8), done);
c->chooseleaf_stable = ceph_decode_8(p);
dout("crush decode tunable chooseleaf_stable = %d\n",
c->chooseleaf_stable);
done:
crush_finalize(c);
dout("crush_decode success\n");
return c;
badmem:
err = -ENOMEM;
bad:
dout("crush_decode fail %d\n", err);
crush_destroy(c);
return ERR_PTR(err);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 1157 | 85.51% | 5 | 38.46% |
Ilya Dryomov | 149 | 11.01% | 5 | 38.46% |
Jim Schutt | 37 | 2.73% | 1 | 7.69% |
Xi Wang | 8 | 0.59% | 1 | 7.69% |
Eric Dumazet | 2 | 0.15% | 1 | 7.69% |
Total | 1353 | 100.00% | 13 | 100.00% |
int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
{
if (lhs->pool < rhs->pool)
return -1;
if (lhs->pool > rhs->pool)
return 1;
if (lhs->seed < rhs->seed)
return -1;
if (lhs->seed > rhs->seed)
return 1;
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 51 | 68.92% | 3 | 75.00% |
Ilya Dryomov | 23 | 31.08% | 1 | 25.00% |
Total | 74 | 100.00% | 4 | 100.00% |
/*
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
* to a set of osds) and primary_temp (explicit primary setting)
*/
static int __insert_pg_mapping(struct ceph_pg_mapping *new,
struct rb_root *root)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct ceph_pg_mapping *pg = NULL;
int c;
dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
while (*p) {
parent = *p;
pg = rb_entry(parent, struct ceph_pg_mapping, node);
c = ceph_pg_compare(&new->pgid, &pg->pgid);
if (c < 0)
p = &(*p)->rb_left;
else if (c > 0)
p = &(*p)->rb_right;
else
return -EEXIST;
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, root);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 160 | 98.16% | 2 | 66.67% |
Ilya Dryomov | 3 | 1.84% | 1 | 33.33% |
Total | 163 | 100.00% | 3 | 100.00% |
static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
struct ceph_pg pgid)
{
struct rb_node *n = root->rb_node;
struct ceph_pg_mapping *pg;
int c;
while (n) {
pg = rb_entry(n, struct ceph_pg_mapping, node);
c = ceph_pg_compare(&pgid, &pg->pgid);
if (c < 0) {
n = n->rb_left;
} else if (c > 0) {
n = n->rb_right;
} else {
dout("__lookup_pg_mapping %lld.%x got %p\n",
pgid.pool, pgid.seed, pg);
return pg;
}
}
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 115 | 97.46% | 4 | 80.00% |
Ilya Dryomov | 3 | 2.54% | 1 | 20.00% |
Total | 118 | 100.00% | 5 | 100.00% |
static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
{
struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
if (pg) {
dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
pg);
rb_erase(&pg->node, root);
kfree(pg);
return 0;
}
dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
return -ENOENT;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 83 | 100.00% | 2 | 100.00% |
Total | 83 | 100.00% | 2 | 100.00% |
/*
* rbtree of pg pool info
*/
static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct ceph_pg_pool_info *pi = NULL;
while (*p) {
parent = *p;
pi = rb_entry(parent, struct ceph_pg_pool_info, node);
if (new->id < pi->id)
p = &(*p)->rb_left;
else if (new->id > pi->id)
p = &(*p)->rb_right;
else
return -EEXIST;
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, root);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 136 | 100.00% | 4 | 100.00% |
Total | 136 | 100.00% | 4 | 100.00% |
static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
{
struct ceph_pg_pool_info *pi;
struct rb_node *n = root->rb_node;
while (n) {
pi = rb_entry(n, struct ceph_pg_pool_info, node);
if (id < pi->id)
n = n->rb_left;
else if (id > pi->id)
n = n->rb_right;
else
return pi;
}
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Sage Weil | 84 | 100.00% | 2 | 100.00% |
Total | 84 | 100.00% | 2 | 100.00% |
struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
{
return __lookup_pg_pool(&map->pg_pools, id);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilya Dryomov | 26 | 100.00% | 1 | 100.00% |
Total | 26 | 100.00% | 1 | 100.00% |
const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
{
struct ceph_pg_pool_info *pi;
if (id == CEPH_NOPOOL)
return NULL;
if (WARN_ON_ONCE(id > (u64) INT_MAX))
return NULL;
pi = __lookup_pg_pool(&map->pg_pools, (int) id);
return pi ? pi->name : NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Alex Elder | 68 | 100.00% | 1 | 100.00% |
Total | 68 | 100.00% | 1 | 100.00% |
EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
{
struct rb_node *rbp;
for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
struct ceph_pg_pool_info *pi =
rb_entry(rbp, struct ceph_pg_pool_info, node);
if (pi->name && strcmp(pi->name, name) == 0)
return