| .. | .. |
|---|
| 6 | 6 | * Copyright 2012 Google, Inc. |
|---|
| 7 | 7 | */ |
|---|
| 8 | 8 | |
|---|
| 9 | | -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ |
|---|
| 9 | +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ |
|---|
| 10 | 10 | |
|---|
| 11 | 11 | #include "util.h" |
|---|
| 12 | 12 | #include "bset.h" |
|---|
| .. | .. |
|---|
| 31 | 31 | if (b->ops->key_dump) |
|---|
| 32 | 32 | b->ops->key_dump(b, k); |
|---|
| 33 | 33 | else |
|---|
| 34 | | - pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); |
|---|
| 34 | + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); |
|---|
| 35 | 35 | |
|---|
| 36 | 36 | if (next < bset_bkey_last(i) && |
|---|
| 37 | 37 | bkey_cmp(k, b->ops->is_extents ? |
|---|
| .. | .. |
|---|
| 155 | 155 | return 0; |
|---|
| 156 | 156 | } |
|---|
| 157 | 157 | |
|---|
| 158 | +/* Pop the top key of keylist by pointing l->top to its previous key */ |
|---|
| 158 | 159 | struct bkey *bch_keylist_pop(struct keylist *l) |
|---|
| 159 | 160 | { |
|---|
| 160 | 161 | struct bkey *k = l->keys; |
|---|
| .. | .. |
|---|
| 168 | 169 | return l->top = k; |
|---|
| 169 | 170 | } |
|---|
| 170 | 171 | |
|---|
| 172 | +/* Pop the bottom key of keylist and update l->top_p */ |
|---|
| 171 | 173 | void bch_keylist_pop_front(struct keylist *l) |
|---|
| 172 | 174 | { |
|---|
| 173 | 175 | l->top_p -= bkey_u64s(l->keys); |
|---|
| .. | .. |
|---|
| 309 | 311 | t->tree = NULL; |
|---|
| 310 | 312 | t->data = NULL; |
|---|
| 311 | 313 | } |
|---|
| 312 | | -EXPORT_SYMBOL(bch_btree_keys_free); |
|---|
| 313 | 314 | |
|---|
| 314 | 315 | int bch_btree_keys_alloc(struct btree_keys *b, |
|---|
| 315 | 316 | unsigned int page_order, |
|---|
| .. | .. |
|---|
| 342 | 343 | bch_btree_keys_free(b); |
|---|
| 343 | 344 | return -ENOMEM; |
|---|
| 344 | 345 | } |
|---|
| 345 | | -EXPORT_SYMBOL(bch_btree_keys_alloc); |
|---|
| 346 | 346 | |
|---|
| 347 | 347 | void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, |
|---|
| 348 | 348 | bool *expensive_debug_checks) |
|---|
| 349 | 349 | { |
|---|
| 350 | | - unsigned int i; |
|---|
| 351 | | - |
|---|
| 352 | 350 | b->ops = ops; |
|---|
| 353 | 351 | b->expensive_debug_checks = expensive_debug_checks; |
|---|
| 354 | 352 | b->nsets = 0; |
|---|
| 355 | 353 | b->last_set_unwritten = 0; |
|---|
| 356 | 354 | |
|---|
| 357 | | - /* XXX: shouldn't be needed */ |
|---|
| 358 | | - for (i = 0; i < MAX_BSETS; i++) |
|---|
| 359 | | - b->set[i].size = 0; |
|---|
| 360 | 355 | /* |
|---|
| 361 | | - * Second loop starts at 1 because b->keys[0]->data is the memory we |
|---|
| 362 | | - * allocated |
|---|
| 356 | + * struct btree_keys in embedded in struct btree, and struct |
|---|
| 357 | + * bset_tree is embedded into struct btree_keys. They are all |
|---|
| 358 | + * initialized as 0 by kzalloc() in mca_bucket_alloc(), and |
|---|
| 359 | + * b->set[0].data is allocated in bch_btree_keys_alloc(), so we |
|---|
| 360 | + * don't have to initiate b->set[].size and b->set[].data here |
|---|
| 361 | + * any more. |
|---|
| 363 | 362 | */ |
|---|
| 364 | | - for (i = 1; i < MAX_BSETS; i++) |
|---|
| 365 | | - b->set[i].data = NULL; |
|---|
| 366 | 363 | } |
|---|
| 367 | | -EXPORT_SYMBOL(bch_btree_keys_init); |
|---|
| 368 | 364 | |
|---|
| 369 | 365 | /* Binary tree stuff for auxiliary search trees */ |
|---|
| 370 | 366 | |
|---|
| .. | .. |
|---|
| 681 | 677 | |
|---|
| 682 | 678 | bch_bset_build_unwritten_tree(b); |
|---|
| 683 | 679 | } |
|---|
| 684 | | -EXPORT_SYMBOL(bch_bset_init_next); |
|---|
| 685 | 680 | |
|---|
| 686 | 681 | /* |
|---|
| 687 | 682 | * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to |
|---|
| .. | .. |
|---|
| 735 | 730 | j = inorder_next(j, t->size)) |
|---|
| 736 | 731 | make_bfloat(t, j); |
|---|
| 737 | 732 | } |
|---|
| 738 | | -EXPORT_SYMBOL(bch_bset_build_written_tree); |
|---|
| 739 | 733 | |
|---|
| 740 | 734 | /* Insert */ |
|---|
| 741 | 735 | |
|---|
| .. | .. |
|---|
| 783 | 777 | j = j * 2 + 1; |
|---|
| 784 | 778 | } while (j < t->size); |
|---|
| 785 | 779 | } |
|---|
| 786 | | -EXPORT_SYMBOL(bch_bset_fix_invalidated_key); |
|---|
| 787 | 780 | |
|---|
| 788 | 781 | static void bch_bset_fix_lookup_table(struct btree_keys *b, |
|---|
| 789 | 782 | struct bset_tree *t, |
|---|
| .. | .. |
|---|
| 858 | 851 | |
|---|
| 859 | 852 | return b->ops->key_merge(b, l, r); |
|---|
| 860 | 853 | } |
|---|
| 861 | | -EXPORT_SYMBOL(bch_bkey_try_merge); |
|---|
| 862 | 854 | |
|---|
| 863 | 855 | void bch_bset_insert(struct btree_keys *b, struct bkey *where, |
|---|
| 864 | 856 | struct bkey *insert) |
|---|
| .. | .. |
|---|
| 878 | 870 | bkey_copy(where, insert); |
|---|
| 879 | 871 | bch_bset_fix_lookup_table(b, t, where); |
|---|
| 880 | 872 | } |
|---|
| 881 | | -EXPORT_SYMBOL(bch_bset_insert); |
|---|
| 882 | 873 | |
|---|
| 883 | 874 | unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, |
|---|
| 884 | 875 | struct bkey *replace_key) |
|---|
| .. | .. |
|---|
| 934 | 925 | merged: |
|---|
| 935 | 926 | return status; |
|---|
| 936 | 927 | } |
|---|
| 937 | | -EXPORT_SYMBOL(bch_btree_insert_key); |
|---|
| 938 | 928 | |
|---|
| 939 | 929 | /* Lookup */ |
|---|
| 940 | 930 | |
|---|
| .. | .. |
|---|
| 970 | 960 | unsigned int inorder, j, n = 1; |
|---|
| 971 | 961 | |
|---|
| 972 | 962 | do { |
|---|
| 973 | | - /* |
|---|
| 974 | | - * A bit trick here. |
|---|
| 975 | | - * If p < t->size, (int)(p - t->size) is a minus value and |
|---|
| 976 | | - * the most significant bit is set, right shifting 31 bits |
|---|
| 977 | | - * gets 1. If p >= t->size, the most significant bit is |
|---|
| 978 | | - * not set, right shifting 31 bits gets 0. |
|---|
| 979 | | - * So the following 2 lines equals to |
|---|
| 980 | | - * if (p >= t->size) |
|---|
| 981 | | - * p = 0; |
|---|
| 982 | | - * but a branch instruction is avoided. |
|---|
| 983 | | - */ |
|---|
| 984 | 963 | unsigned int p = n << 4; |
|---|
| 985 | 964 | |
|---|
| 986 | | - p &= ((int) (p - t->size)) >> 31; |
|---|
| 987 | | - |
|---|
| 988 | | - prefetch(&t->tree[p]); |
|---|
| 965 | + if (p < t->size) |
|---|
| 966 | + prefetch(&t->tree[p]); |
|---|
| 989 | 967 | |
|---|
| 990 | 968 | j = n; |
|---|
| 991 | 969 | f = &t->tree[j]; |
|---|
| 992 | 970 | |
|---|
| 993 | | - /* |
|---|
| 994 | | - * Similar bit trick, use subtract operation to avoid a branch |
|---|
| 995 | | - * instruction. |
|---|
| 996 | | - * |
|---|
| 997 | | - * n = (f->mantissa > bfloat_mantissa()) |
|---|
| 998 | | - * ? j * 2 |
|---|
| 999 | | - * : j * 2 + 1; |
|---|
| 1000 | | - * |
|---|
| 1001 | | - * We need to subtract 1 from f->mantissa for the sign bit trick |
|---|
| 1002 | | - * to work - that's done in make_bfloat() |
|---|
| 1003 | | - */ |
|---|
| 1004 | | - if (likely(f->exponent != 127)) |
|---|
| 1005 | | - n = j * 2 + (((unsigned int) |
|---|
| 1006 | | - (f->mantissa - |
|---|
| 1007 | | - bfloat_mantissa(search, f))) >> 31); |
|---|
| 1008 | | - else |
|---|
| 1009 | | - n = (bkey_cmp(tree_to_bkey(t, j), search) > 0) |
|---|
| 1010 | | - ? j * 2 |
|---|
| 1011 | | - : j * 2 + 1; |
|---|
| 971 | + if (likely(f->exponent != 127)) { |
|---|
| 972 | + if (f->mantissa >= bfloat_mantissa(search, f)) |
|---|
| 973 | + n = j * 2; |
|---|
| 974 | + else |
|---|
| 975 | + n = j * 2 + 1; |
|---|
| 976 | + } else { |
|---|
| 977 | + if (bkey_cmp(tree_to_bkey(t, j), search) > 0) |
|---|
| 978 | + n = j * 2; |
|---|
| 979 | + else |
|---|
| 980 | + n = j * 2 + 1; |
|---|
| 981 | + } |
|---|
| 1012 | 982 | } while (n < t->size); |
|---|
| 1013 | 983 | |
|---|
| 1014 | 984 | inorder = to_inorder(j, t); |
|---|
| .. | .. |
|---|
| 1100 | 1070 | |
|---|
| 1101 | 1071 | return i.l; |
|---|
| 1102 | 1072 | } |
|---|
| 1103 | | -EXPORT_SYMBOL(__bch_bset_search); |
|---|
| 1104 | 1073 | |
|---|
| 1105 | 1074 | /* Btree iterator */ |
|---|
| 1106 | 1075 | |
|---|
| .. | .. |
|---|
| 1155 | 1124 | { |
|---|
| 1156 | 1125 | return __bch_btree_iter_init(b, iter, search, b->set); |
|---|
| 1157 | 1126 | } |
|---|
| 1158 | | -EXPORT_SYMBOL(bch_btree_iter_init); |
|---|
| 1159 | 1127 | |
|---|
| 1160 | 1128 | static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, |
|---|
| 1161 | 1129 | btree_iter_cmp_fn *cmp) |
|---|
| .. | .. |
|---|
| 1188 | 1156 | return __bch_btree_iter_next(iter, btree_iter_cmp); |
|---|
| 1189 | 1157 | |
|---|
| 1190 | 1158 | } |
|---|
| 1191 | | -EXPORT_SYMBOL(bch_btree_iter_next); |
|---|
| 1192 | 1159 | |
|---|
| 1193 | 1160 | struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, |
|---|
| 1194 | 1161 | struct btree_keys *b, ptr_filter_fn fn) |
|---|
| .. | .. |
|---|
| 1219 | 1186 | |
|---|
| 1220 | 1187 | return mempool_init_page_pool(&state->pool, 1, page_order); |
|---|
| 1221 | 1188 | } |
|---|
| 1222 | | -EXPORT_SYMBOL(bch_bset_sort_state_init); |
|---|
| 1223 | 1189 | |
|---|
| 1224 | 1190 | static void btree_mergesort(struct btree_keys *b, struct bset *out, |
|---|
| 1225 | 1191 | struct btree_iter *iter, |
|---|
| .. | .. |
|---|
| 1259 | 1225 | |
|---|
| 1260 | 1226 | out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; |
|---|
| 1261 | 1227 | |
|---|
| 1262 | | - pr_debug("sorted %i keys", out->keys); |
|---|
| 1228 | + pr_debug("sorted %i keys\n", out->keys); |
|---|
| 1263 | 1229 | } |
|---|
| 1264 | 1230 | |
|---|
| 1265 | 1231 | static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, |
|---|
| .. | .. |
|---|
| 1291 | 1257 | * Our temporary buffer is the same size as the btree node's |
|---|
| 1292 | 1258 | * buffer, we can just swap buffers instead of doing a big |
|---|
| 1293 | 1259 | * memcpy() |
|---|
| 1260 | + * |
|---|
| 1261 | + * Don't worry event 'out' is allocated from mempool, it can |
|---|
| 1262 | + * still be swapped here. Because state->pool is a page mempool |
|---|
| 1263 | + * creaated by by mempool_init_page_pool(), which allocates |
|---|
| 1264 | + * pages by alloc_pages() indeed. |
|---|
| 1294 | 1265 | */ |
|---|
| 1295 | 1266 | |
|---|
| 1296 | 1267 | out->magic = b->set->data->magic; |
|---|
| .. | .. |
|---|
| 1336 | 1307 | |
|---|
| 1337 | 1308 | EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); |
|---|
| 1338 | 1309 | } |
|---|
| 1339 | | -EXPORT_SYMBOL(bch_btree_sort_partial); |
|---|
| 1340 | 1310 | |
|---|
| 1341 | 1311 | void bch_btree_sort_and_fix_extents(struct btree_keys *b, |
|---|
| 1342 | 1312 | struct btree_iter *iter, |
|---|
| .. | .. |
|---|
| 1389 | 1359 | out: |
|---|
| 1390 | 1360 | bch_bset_build_written_tree(b); |
|---|
| 1391 | 1361 | } |
|---|
| 1392 | | -EXPORT_SYMBOL(bch_btree_sort_lazy); |
|---|
| 1393 | 1362 | |
|---|
| 1394 | 1363 | void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) |
|---|
| 1395 | 1364 | { |
|---|