.. | .. |
---|
6 | 6 | * Copyright 2012 Google, Inc. |
---|
7 | 7 | */ |
---|
8 | 8 | |
---|
9 | | -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ |
---|
| 9 | +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ |
---|
10 | 10 | |
---|
11 | 11 | #include "util.h" |
---|
12 | 12 | #include "bset.h" |
---|
.. | .. |
---|
31 | 31 | if (b->ops->key_dump) |
---|
32 | 32 | b->ops->key_dump(b, k); |
---|
33 | 33 | else |
---|
34 | | - pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); |
---|
| 34 | + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); |
---|
35 | 35 | |
---|
36 | 36 | if (next < bset_bkey_last(i) && |
---|
37 | 37 | bkey_cmp(k, b->ops->is_extents ? |
---|
.. | .. |
---|
155 | 155 | return 0; |
---|
156 | 156 | } |
---|
157 | 157 | |
---|
| 158 | +/* Pop the top key of keylist by pointing l->top to its previous key */ |
---|
158 | 159 | struct bkey *bch_keylist_pop(struct keylist *l) |
---|
159 | 160 | { |
---|
160 | 161 | struct bkey *k = l->keys; |
---|
.. | .. |
---|
168 | 169 | return l->top = k; |
---|
169 | 170 | } |
---|
170 | 171 | |
---|
| 172 | +/* Pop the bottom key of keylist and update l->top_p */ |
---|
171 | 173 | void bch_keylist_pop_front(struct keylist *l) |
---|
172 | 174 | { |
---|
173 | 175 | l->top_p -= bkey_u64s(l->keys); |
---|
.. | .. |
---|
309 | 311 | t->tree = NULL; |
---|
310 | 312 | t->data = NULL; |
---|
311 | 313 | } |
---|
312 | | -EXPORT_SYMBOL(bch_btree_keys_free); |
---|
313 | 314 | |
---|
314 | 315 | int bch_btree_keys_alloc(struct btree_keys *b, |
---|
315 | 316 | unsigned int page_order, |
---|
.. | .. |
---|
342 | 343 | bch_btree_keys_free(b); |
---|
343 | 344 | return -ENOMEM; |
---|
344 | 345 | } |
---|
345 | | -EXPORT_SYMBOL(bch_btree_keys_alloc); |
---|
346 | 346 | |
---|
347 | 347 | void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, |
---|
348 | 348 | bool *expensive_debug_checks) |
---|
349 | 349 | { |
---|
350 | | - unsigned int i; |
---|
351 | | - |
---|
352 | 350 | b->ops = ops; |
---|
353 | 351 | b->expensive_debug_checks = expensive_debug_checks; |
---|
354 | 352 | b->nsets = 0; |
---|
355 | 353 | b->last_set_unwritten = 0; |
---|
356 | 354 | |
---|
357 | | - /* XXX: shouldn't be needed */ |
---|
358 | | - for (i = 0; i < MAX_BSETS; i++) |
---|
359 | | - b->set[i].size = 0; |
---|
360 | 355 | /* |
---|
361 | | - * Second loop starts at 1 because b->keys[0]->data is the memory we |
---|
362 | | - * allocated |
---|
| 356 | + * struct btree_keys in embedded in struct btree, and struct |
---|
| 357 | + * bset_tree is embedded into struct btree_keys. They are all |
---|
| 358 | + * initialized as 0 by kzalloc() in mca_bucket_alloc(), and |
---|
| 359 | + * b->set[0].data is allocated in bch_btree_keys_alloc(), so we |
---|
| 360 | + * don't have to initiate b->set[].size and b->set[].data here |
---|
| 361 | + * any more. |
---|
363 | 362 | */ |
---|
364 | | - for (i = 1; i < MAX_BSETS; i++) |
---|
365 | | - b->set[i].data = NULL; |
---|
366 | 363 | } |
---|
367 | | -EXPORT_SYMBOL(bch_btree_keys_init); |
---|
368 | 364 | |
---|
369 | 365 | /* Binary tree stuff for auxiliary search trees */ |
---|
370 | 366 | |
---|
.. | .. |
---|
681 | 677 | |
---|
682 | 678 | bch_bset_build_unwritten_tree(b); |
---|
683 | 679 | } |
---|
684 | | -EXPORT_SYMBOL(bch_bset_init_next); |
---|
685 | 680 | |
---|
686 | 681 | /* |
---|
687 | 682 | * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to |
---|
.. | .. |
---|
735 | 730 | j = inorder_next(j, t->size)) |
---|
736 | 731 | make_bfloat(t, j); |
---|
737 | 732 | } |
---|
738 | | -EXPORT_SYMBOL(bch_bset_build_written_tree); |
---|
739 | 733 | |
---|
740 | 734 | /* Insert */ |
---|
741 | 735 | |
---|
.. | .. |
---|
783 | 777 | j = j * 2 + 1; |
---|
784 | 778 | } while (j < t->size); |
---|
785 | 779 | } |
---|
786 | | -EXPORT_SYMBOL(bch_bset_fix_invalidated_key); |
---|
787 | 780 | |
---|
788 | 781 | static void bch_bset_fix_lookup_table(struct btree_keys *b, |
---|
789 | 782 | struct bset_tree *t, |
---|
.. | .. |
---|
858 | 851 | |
---|
859 | 852 | return b->ops->key_merge(b, l, r); |
---|
860 | 853 | } |
---|
861 | | -EXPORT_SYMBOL(bch_bkey_try_merge); |
---|
862 | 854 | |
---|
863 | 855 | void bch_bset_insert(struct btree_keys *b, struct bkey *where, |
---|
864 | 856 | struct bkey *insert) |
---|
.. | .. |
---|
878 | 870 | bkey_copy(where, insert); |
---|
879 | 871 | bch_bset_fix_lookup_table(b, t, where); |
---|
880 | 872 | } |
---|
881 | | -EXPORT_SYMBOL(bch_bset_insert); |
---|
882 | 873 | |
---|
883 | 874 | unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, |
---|
884 | 875 | struct bkey *replace_key) |
---|
.. | .. |
---|
934 | 925 | merged: |
---|
935 | 926 | return status; |
---|
936 | 927 | } |
---|
937 | | -EXPORT_SYMBOL(bch_btree_insert_key); |
---|
938 | 928 | |
---|
939 | 929 | /* Lookup */ |
---|
940 | 930 | |
---|
.. | .. |
---|
970 | 960 | unsigned int inorder, j, n = 1; |
---|
971 | 961 | |
---|
972 | 962 | do { |
---|
973 | | - /* |
---|
974 | | - * A bit trick here. |
---|
975 | | - * If p < t->size, (int)(p - t->size) is a minus value and |
---|
976 | | - * the most significant bit is set, right shifting 31 bits |
---|
977 | | - * gets 1. If p >= t->size, the most significant bit is |
---|
978 | | - * not set, right shifting 31 bits gets 0. |
---|
979 | | - * So the following 2 lines equals to |
---|
980 | | - * if (p >= t->size) |
---|
981 | | - * p = 0; |
---|
982 | | - * but a branch instruction is avoided. |
---|
983 | | - */ |
---|
984 | 963 | unsigned int p = n << 4; |
---|
985 | 964 | |
---|
986 | | - p &= ((int) (p - t->size)) >> 31; |
---|
987 | | - |
---|
988 | | - prefetch(&t->tree[p]); |
---|
| 965 | + if (p < t->size) |
---|
| 966 | + prefetch(&t->tree[p]); |
---|
989 | 967 | |
---|
990 | 968 | j = n; |
---|
991 | 969 | f = &t->tree[j]; |
---|
992 | 970 | |
---|
993 | | - /* |
---|
994 | | - * Similar bit trick, use subtract operation to avoid a branch |
---|
995 | | - * instruction. |
---|
996 | | - * |
---|
997 | | - * n = (f->mantissa > bfloat_mantissa()) |
---|
998 | | - * ? j * 2 |
---|
999 | | - * : j * 2 + 1; |
---|
1000 | | - * |
---|
1001 | | - * We need to subtract 1 from f->mantissa for the sign bit trick |
---|
1002 | | - * to work - that's done in make_bfloat() |
---|
1003 | | - */ |
---|
1004 | | - if (likely(f->exponent != 127)) |
---|
1005 | | - n = j * 2 + (((unsigned int) |
---|
1006 | | - (f->mantissa - |
---|
1007 | | - bfloat_mantissa(search, f))) >> 31); |
---|
1008 | | - else |
---|
1009 | | - n = (bkey_cmp(tree_to_bkey(t, j), search) > 0) |
---|
1010 | | - ? j * 2 |
---|
1011 | | - : j * 2 + 1; |
---|
| 971 | + if (likely(f->exponent != 127)) { |
---|
| 972 | + if (f->mantissa >= bfloat_mantissa(search, f)) |
---|
| 973 | + n = j * 2; |
---|
| 974 | + else |
---|
| 975 | + n = j * 2 + 1; |
---|
| 976 | + } else { |
---|
| 977 | + if (bkey_cmp(tree_to_bkey(t, j), search) > 0) |
---|
| 978 | + n = j * 2; |
---|
| 979 | + else |
---|
| 980 | + n = j * 2 + 1; |
---|
| 981 | + } |
---|
1012 | 982 | } while (n < t->size); |
---|
1013 | 983 | |
---|
1014 | 984 | inorder = to_inorder(j, t); |
---|
.. | .. |
---|
1100 | 1070 | |
---|
1101 | 1071 | return i.l; |
---|
1102 | 1072 | } |
---|
1103 | | -EXPORT_SYMBOL(__bch_bset_search); |
---|
1104 | 1073 | |
---|
1105 | 1074 | /* Btree iterator */ |
---|
1106 | 1075 | |
---|
.. | .. |
---|
1155 | 1124 | { |
---|
1156 | 1125 | return __bch_btree_iter_init(b, iter, search, b->set); |
---|
1157 | 1126 | } |
---|
1158 | | -EXPORT_SYMBOL(bch_btree_iter_init); |
---|
1159 | 1127 | |
---|
1160 | 1128 | static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, |
---|
1161 | 1129 | btree_iter_cmp_fn *cmp) |
---|
.. | .. |
---|
1188 | 1156 | return __bch_btree_iter_next(iter, btree_iter_cmp); |
---|
1189 | 1157 | |
---|
1190 | 1158 | } |
---|
1191 | | -EXPORT_SYMBOL(bch_btree_iter_next); |
---|
1192 | 1159 | |
---|
1193 | 1160 | struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, |
---|
1194 | 1161 | struct btree_keys *b, ptr_filter_fn fn) |
---|
.. | .. |
---|
1219 | 1186 | |
---|
1220 | 1187 | return mempool_init_page_pool(&state->pool, 1, page_order); |
---|
1221 | 1188 | } |
---|
1222 | | -EXPORT_SYMBOL(bch_bset_sort_state_init); |
---|
1223 | 1189 | |
---|
1224 | 1190 | static void btree_mergesort(struct btree_keys *b, struct bset *out, |
---|
1225 | 1191 | struct btree_iter *iter, |
---|
.. | .. |
---|
1259 | 1225 | |
---|
1260 | 1226 | out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; |
---|
1261 | 1227 | |
---|
1262 | | - pr_debug("sorted %i keys", out->keys); |
---|
| 1228 | + pr_debug("sorted %i keys\n", out->keys); |
---|
1263 | 1229 | } |
---|
1264 | 1230 | |
---|
1265 | 1231 | static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, |
---|
.. | .. |
---|
1291 | 1257 | * Our temporary buffer is the same size as the btree node's |
---|
1292 | 1258 | * buffer, we can just swap buffers instead of doing a big |
---|
1293 | 1259 | * memcpy() |
---|
| 1260 | + * |
---|
| 1261 | + * Don't worry event 'out' is allocated from mempool, it can |
---|
| 1262 | + * still be swapped here. Because state->pool is a page mempool |
---|
| 1263 | + * creaated by by mempool_init_page_pool(), which allocates |
---|
| 1264 | + * pages by alloc_pages() indeed. |
---|
1294 | 1265 | */ |
---|
1295 | 1266 | |
---|
1296 | 1267 | out->magic = b->set->data->magic; |
---|
.. | .. |
---|
1336 | 1307 | |
---|
1337 | 1308 | EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); |
---|
1338 | 1309 | } |
---|
1339 | | -EXPORT_SYMBOL(bch_btree_sort_partial); |
---|
1340 | 1310 | |
---|
1341 | 1311 | void bch_btree_sort_and_fix_extents(struct btree_keys *b, |
---|
1342 | 1312 | struct btree_iter *iter, |
---|
.. | .. |
---|
1389 | 1359 | out: |
---|
1390 | 1360 | bch_bset_build_written_tree(b); |
---|
1391 | 1361 | } |
---|
1392 | | -EXPORT_SYMBOL(bch_btree_sort_lazy); |
---|
1393 | 1362 | |
---|
1394 | 1363 | void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) |
---|
1395 | 1364 | { |
---|