From a36159eec6ca17402b0e146b86efaf76568dc353 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 20 Sep 2024 01:41:23 +0000 Subject: [PATCH] 重命名 AX88772C_eeprom/asix.c 为 asix_mac.c --- kernel/drivers/md/bcache/btree.c | 502 +++++++++++++++++++++++++++++++++++-------------------- 1 files changed, 321 insertions(+), 181 deletions(-) diff --git a/kernel/drivers/md/bcache/btree.c b/kernel/drivers/md/bcache/btree.c index e388e7b..24c57bb 100644 --- a/kernel/drivers/md/bcache/btree.c +++ b/kernel/drivers/md/bcache/btree.c @@ -99,70 +99,14 @@ #define PTR_HASH(c, k) \ (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) +static struct workqueue_struct *btree_io_wq; + #define insert_lock(s, b) ((b)->level <= (s)->lock) -/* - * These macros are for recursing down the btree - they handle the details of - * locking and looking up nodes in the cache for you. They're best treated as - * mere syntax when reading code that uses them. - * - * op->lock determines whether we take a read or a write lock at a given depth. - * If you've got a read lock and find that you need a write lock (i.e. you're - * going to have to split), set op->lock and return -EINTR; btree_root() will - * call you again and you'll have the correct lock. - */ - -/** - * btree - recurse down the btree on a specified key - * @fn: function to call, which will be passed the child node - * @key: key to recurse on - * @b: parent btree node - * @op: pointer to struct btree_op - */ -#define btree(fn, key, b, op, ...) \ -({ \ - int _r, l = (b)->level - 1; \ - bool _w = l <= (op)->lock; \ - struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ - _w, b); \ - if (!IS_ERR(_child)) { \ - _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ - rw_unlock(_w, _child); \ - } else \ - _r = PTR_ERR(_child); \ - _r; \ -}) - -/** - * btree_root - call a function on the root of the btree - * @fn: function to call, which will be passed the child node - * @c: cache set - * @op: pointer to struct btree_op - */ -#define btree_root(fn, c, op, ...) \ -({ \ - int _r = -EINTR; \ - do { \ - struct btree *_b = (c)->root; \ - bool _w = insert_lock(op, _b); \ - rw_lock(_w, _b, _b->level); \ - if (_b == (c)->root && \ - _w == insert_lock(op, _b)) { \ - _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ - } \ - rw_unlock(_w, _b); \ - bch_cannibalize_unlock(c); \ - if (_r == -EINTR) \ - schedule(); \ - } while (_r == -EINTR); \ - \ - finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ - _r; \ -}) static inline struct bset *write_block(struct btree *b) { - return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); + return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c->cache); } static void bch_btree_init_next(struct btree *b) @@ -175,7 +119,7 @@ if (b->written < btree_blocks(b)) bch_bset_init_next(&b->keys, write_block(b), - bset_magic(&b->c->sb)); + bset_magic(&b->c->cache->sb)); } @@ -207,8 +151,13 @@ struct bset *i = btree_bset_first(b); struct btree_iter *iter; + /* + * c->fill_iter can allocate an iterator with more memory space + * than static MAX_BSETS. + * See the comment arount cache_set->fill_iter. + */ iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter->size = b->c->sb.bucket_size / b->c->sb.block_size; + iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; iter->used = 0; #ifdef CONFIG_BCACHE_DEBUG @@ -226,12 +175,12 @@ goto err; err = "bad btree header"; - if (b->written + set_blocks(i, block_bytes(b->c)) > + if (b->written + set_blocks(i, block_bytes(b->c->cache)) > btree_blocks(b)) goto err; err = "bad magic"; - if (i->magic != bset_magic(&b->c->sb)) + if (i->magic != bset_magic(&b->c->cache->sb)) goto err; err = "bad checksum"; @@ -252,13 +201,13 @@ bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); - b->written += set_blocks(i, block_bytes(b->c)); + b->written += set_blocks(i, block_bytes(b->c->cache)); } err = "corrupted btree"; for (i = write_block(b); bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key); - i = ((void *) i) + block_bytes(b->c)) + i = ((void *) i) + block_bytes(b->c->cache)) if (i->seq == b->keys.set[0].data->seq) goto err; @@ -272,7 +221,7 @@ if (b->written < btree_blocks(b)) bch_bset_init_next(&b->keys, write_block(b), - bset_magic(&b->c->sb)); + bset_magic(&b->c->cache->sb)); out: mempool_free(iter, &b->c->fill_iter); return; @@ -361,7 +310,7 @@ btree_complete_write(b, w); if (btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); closure_return_with_destructor(cl, btree_node_write_unlock); } @@ -400,7 +349,7 @@ b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; - b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); + b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c->cache)); b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA; bch_bio_map(b->bio, i); @@ -424,13 +373,14 @@ bset_sector_offset(&b->keys, i)); if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { - int j; struct bio_vec *bv; - void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); + void *addr = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); + struct bvec_iter_all iter_all; - bio_for_each_segment_all(bv, b->bio, j) - memcpy(page_address(bv->bv_page), - base + j * PAGE_SIZE, PAGE_SIZE); + bio_for_each_segment_all(bv, b->bio, iter_all) { + memcpy(page_address(bv->bv_page), addr, PAGE_SIZE); + addr += PAGE_SIZE; + } bch_submit_bbio(b->bio, b->c, &k.key, 0); @@ -475,10 +425,10 @@ do_btree_node_write(b); - atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, + atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->cache->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); - b->written += set_blocks(i, block_bytes(b->c)); + b->written += set_blocks(i, block_bytes(b->c->cache)); } void bch_btree_node_write(struct btree *b, struct closure *parent) @@ -533,10 +483,15 @@ BUG_ON(!i->keys); if (!btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); set_btree_node_dirty(b); + /* + * w->journal is always the oldest journal pin of all bkeys + * in the leaf node, to make sure the oldest jset seq won't + * be increased before this btree node is flushed. + */ if (journal_ref) { if (w->journal && journal_pin_cmp(b->c, w->journal, journal_ref)) { @@ -561,7 +516,7 @@ * mca -> memory cache */ -#define mca_reserve(c) (((c->root && c->root->level) \ +#define mca_reserve(c) (((!IS_ERR_OR_NULL(c->root) && c->root->level) \ ? c->root->level : 1) * 8 + 16) #define mca_can_free(c) \ max_t(int, 0, c->btree_cache_used - mca_reserve(c)) @@ -607,6 +562,10 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, struct bkey *k, gfp_t gfp) { + /* + * kzalloc() is necessary here for initialization, + * see code comments in bch_btree_keys_init(). + */ struct btree *b = kzalloc(sizeof(struct btree), gfp); if (!b) @@ -662,7 +621,7 @@ * and BTREE_NODE_journal_flush bit cleared by btree_flush_write(). */ if (btree_node_journal_flush(b)) { - pr_debug("bnode %p is flushing by journal, retry", b); + pr_debug("bnode %p is flushing by journal, retry\n", b); mutex_unlock(&b->write_lock); udelay(1); goto retry; @@ -719,34 +678,32 @@ i = 0; btree_cache_used = c->btree_cache_used; - list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { + list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) { if (nr <= 0) goto out; - if (++i > 3 && - !mca_reap(b, 0, false)) { + if (!mca_reap(b, 0, false)) { mca_data_free(b); rw_unlock(true, b); freed++; } nr--; + i++; } - for (; (nr--) && i < btree_cache_used; i++) { - if (list_empty(&c->btree_cache)) + list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) { + if (nr <= 0 || i >= btree_cache_used) goto out; - b = list_first_entry(&c->btree_cache, struct btree, list); - list_rotate_left(&c->btree_cache); - - if (!b->accessed && - !mca_reap(b, 0, false)) { + if (!mca_reap(b, 0, false)) { mca_bucket_free(b); mca_data_free(b); rw_unlock(true, b); freed++; - } else - b->accessed = 0; + } + + nr--; + i++; } out: mutex_unlock(&c->bucket_lock); @@ -783,7 +740,7 @@ if (c->verify_data) list_move(&c->verify_data->list, &c->btree_cache); - free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c))); + free_pages((unsigned long) c->verify_ondisk, ilog2(meta_bucket_pages(&c->cache->sb))); #endif list_splice(&c->btree_cache_freeable, @@ -830,7 +787,16 @@ mutex_init(&c->verify_lock); c->verify_ondisk = (void *) - __get_free_pages(GFP_KERNEL|__GFP_COMP, ilog2(bucket_pages(c))); + __get_free_pages(GFP_KERNEL|__GFP_COMP, + ilog2(meta_bucket_pages(&c->cache->sb))); + if (!c->verify_ondisk) { + /* + * Don't worry about the mca_rereserve buckets + * allocated in previous for-loop, they will be + * handled properly in bch_cache_set_unregister(). + */ + return -ENOMEM; + } c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); @@ -847,7 +813,7 @@ c->shrink.batch = c->btree_pages * 2; if (register_shrinker(&c->shrink)) - pr_warn("bcache: %s: could not register shrinker", + pr_warn("bcache: %s: could not register shrinker\n", __func__); return 0; @@ -919,7 +885,7 @@ * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -static void bch_cannibalize_unlock(struct cache_set *c) +void bch_cannibalize_unlock(struct cache_set *c) { spin_lock(&c->btree_cannibalize_lock); if (c->btree_cache_alloc_lock == current) { @@ -1004,7 +970,7 @@ * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * - * If IO is necessary and running under generic_make_request, returns -EAGAIN. + * If IO is necessary and running under submit_bio_noacct, returns -EAGAIN. * * The btree node will have either a read or a write lock held, depending on * level and op->lock. @@ -1054,7 +1020,6 @@ BUG_ON(!b->written); b->parent = parent; - b->accessed = 1; for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { prefetch(b->keys.set[i].tree); @@ -1100,7 +1065,7 @@ */ if (btree_node_journal_flush(b)) { mutex_unlock(&b->write_lock); - pr_debug("bnode %p journal_flush set, retry", b); + pr_debug("bnode %p journal_flush set, retry\n", b); udelay(1); goto retry; } @@ -1125,11 +1090,13 @@ struct btree *parent) { BKEY_PADDED(key) k; - struct btree *b = ERR_PTR(-EAGAIN); + struct btree *b; mutex_lock(&c->bucket_lock); retry: - if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) + /* return ERR_PTR(-EAGAIN) when it fails */ + b = ERR_PTR(-EAGAIN); + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) goto err; bkey_put(c, &k.key); @@ -1145,9 +1112,8 @@ goto retry; } - b->accessed = 1; b->parent = parent; - bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); + bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->cache->sb)); mutex_unlock(&c->bucket_lock); @@ -1174,7 +1140,7 @@ { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); bkey_copy_key(&n->key, &b->key); @@ -1206,19 +1172,18 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op) { struct cache_set *c = b->c; - struct cache *ca; - unsigned int i, reserve = (c->root->level - b->level) * 2 + 1; + struct cache *ca = c->cache; + unsigned int reserve = (c->root->level - b->level) * 2 + 1; mutex_lock(&c->bucket_lock); - for_each_cache(ca, c, i) - if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { - if (op) - prepare_to_wait(&c->btree_cache_wait, &op->wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&c->bucket_lock); - return -EINTR; - } + if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { + if (op) + prepare_to_wait(&c->btree_cache_wait, &op->wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&c->bucket_lock); + return -EINTR; + } mutex_unlock(&c->bucket_lock); @@ -1377,19 +1342,19 @@ memset(new_nodes, 0, sizeof(new_nodes)); closure_init_stack(&cl); - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) keys += r[nodes++].keys; blocks = btree_default_blocks(b->c) * 2 / 3; if (nodes < 2 || __set_blocks(b->keys.set[0].data, keys, - block_bytes(b->c)) > blocks * (nodes - 1)) + block_bytes(b->c->cache)) > blocks * (nodes - 1)) return 0; for (i = 0; i < nodes; i++) { new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); - if (IS_ERR_OR_NULL(new_nodes[i])) + if (IS_ERR(new_nodes[i])) goto out_nocoalesce; } @@ -1418,7 +1383,7 @@ k = bkey_next(k)) { if (__set_blocks(n1, n1->keys + keys + bkey_u64s(k), - block_bytes(b->c)) > blocks) + block_bytes(b->c->cache)) > blocks) break; last = k; @@ -1434,7 +1399,7 @@ * though) */ if (__set_blocks(n1, n1->keys + n2->keys, - block_bytes(b->c)) > + block_bytes(b->c->cache)) > btree_blocks(new_nodes[i])) goto out_unlock_nocoalesce; @@ -1443,7 +1408,7 @@ last = &r->b->key; } - BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > + BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c->cache)) > btree_blocks(new_nodes[i])); if (last) @@ -1517,14 +1482,14 @@ out_nocoalesce: closure_sync(&cl); - bch_keylist_free(&keylist); while ((k = bch_keylist_pop(&keylist))) if (!bkey_cmp(k, &ZERO_KEY)) atomic_dec(&b->c->prio_blocked); + bch_keylist_free(&keylist); for (i = 0; i < nodes; i++) - if (!IS_ERR_OR_NULL(new_nodes[i])) { + if (!IS_ERR(new_nodes[i])) { btree_node_free(new_nodes[i]); rw_unlock(true, new_nodes[i]); } @@ -1706,7 +1671,7 @@ if (should_rewrite) { n = btree_node_alloc_replacement(b, NULL); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { bch_btree_node_write_sync(n); bch_btree_set_root(n); @@ -1734,7 +1699,6 @@ { struct cache *ca; struct bucket *b; - unsigned int i; if (!c->gc_mark_valid) return; @@ -1744,14 +1708,14 @@ c->gc_mark_valid = 0; c->gc_done = ZERO_KEY; - for_each_cache(ca, c, i) - for_each_bucket(b, ca) { - b->last_gc = b->gen; - if (!atomic_read(&b->pin)) { - SET_GC_MARK(b, 0); - SET_GC_SECTORS_USED(b, 0); - } + ca = c->cache; + for_each_bucket(b, ca) { + b->last_gc = b->gen; + if (!atomic_read(&b->pin)) { + SET_GC_MARK(b, 0); + SET_GC_SECTORS_USED(b, 0); } + } mutex_unlock(&c->bucket_lock); } @@ -1760,7 +1724,8 @@ { struct bucket *b; struct cache *ca; - unsigned int i; + unsigned int i, j; + uint64_t *k; mutex_lock(&c->bucket_lock); @@ -1778,7 +1743,6 @@ struct bcache_device *d = c->devices[i]; struct cached_dev *dc; struct keybuf_key *w, *n; - unsigned int j; if (!d || UUID_FLASH_ONLY(&c->uuids[i])) continue; @@ -1795,29 +1759,27 @@ rcu_read_unlock(); c->avail_nbuckets = 0; - for_each_cache(ca, c, i) { - uint64_t *i; - ca->invalidate_needs_gc = 0; + ca = c->cache; + ca->invalidate_needs_gc = 0; - for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++) - SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + for (k = ca->sb.d; k < ca->sb.d + ca->sb.keys; k++) + SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA); - for (i = ca->prio_buckets; - i < ca->prio_buckets + prio_buckets(ca) * 2; i++) - SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + for (k = ca->prio_buckets; + k < ca->prio_buckets + prio_buckets(ca) * 2; k++) + SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA); - for_each_bucket(b, ca) { - c->need_gc = max(c->need_gc, bucket_gc_gen(b)); + for_each_bucket(b, ca) { + c->need_gc = max(c->need_gc, bucket_gc_gen(b)); - if (atomic_read(&b->pin)) - continue; + if (atomic_read(&b->pin)) + continue; - BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); + BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); - if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) - c->avail_nbuckets++; - } + if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) + c->avail_nbuckets++; } mutex_unlock(&c->bucket_lock); @@ -1841,7 +1803,7 @@ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { - ret = btree_root(gc_root, c, &op, &writes, &stats); + ret = bcache_btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); cond_resched(); @@ -1849,7 +1811,7 @@ schedule_timeout_interruptible(msecs_to_jiffies (GC_SLEEP_MS)); else if (ret) - pr_warn("gc failed!"); + pr_warn("gc failed!\n"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); @@ -1869,12 +1831,10 @@ static bool gc_should_run(struct cache_set *c) { - struct cache *ca; - unsigned int i; + struct cache *ca = c->cache; - for_each_cache(ca, c, i) - if (ca->invalidate_needs_gc) - return true; + if (ca->invalidate_needs_gc) + return true; if (atomic_read(&c->sectors_to_gc) < 0) return true; @@ -1939,7 +1899,7 @@ } if (p) - ret = btree(check_recurse, p, b, op); + ret = bcache_btree(check_recurse, p, b, op); p = k; } while (p && !ret); @@ -1948,20 +1908,186 @@ return ret; } + +static int bch_btree_check_thread(void *arg) +{ + int ret; + struct btree_check_info *info = arg; + struct btree_check_state *check_state = info->state; + struct cache_set *c = check_state->c; + struct btree_iter iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + + k = p = NULL; + cur_idx = prev_idx = 0; + ret = 0; + + /* root node keys are checked before thread created */ + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; + while (k) { + /* + * Fetch a root node key index, skip the keys which + * should be fetched by other threads, then check the + * sub-tree indexed by the fetched key. + */ + spin_lock(&check_state->idx_lock); + cur_idx = check_state->key_idx; + check_state->key_idx++; + spin_unlock(&check_state->idx_lock); + + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { + k = bch_btree_iter_next_filter(&iter, + &c->root->keys, + bch_ptr_bad); + if (k) + p = k; + else { + /* + * No more keys to check in root node, + * current checking threads are enough, + * stop creating more. + */ + atomic_set(&check_state->enough, 1); + /* Update check_state->enough earlier */ + smp_mb__after_atomic(); + goto out; + } + skip_nr--; + cond_resched(); + } + + if (p) { + struct btree_op op; + + btree_node_prefetch(c->root, p); + c->gc_stats.nodes++; + bch_btree_op_init(&op, 0); + ret = bcache_btree(check_recurse, p, c->root, &op); + /* + * The op may be added to cache_set's btree_cache_wait + * in mca_cannibalize(), must ensure it is removed from + * the list and release btree_cache_alloc_lock before + * free op memory. + * Otherwise, the btree_cache_wait will be damaged. + */ + bch_cannibalize_unlock(c); + finish_wait(&c->btree_cache_wait, &(&op)->wait); + if (ret) + goto out; + } + p = NULL; + prev_idx = cur_idx; + cond_resched(); + } + +out: + info->result = ret; + /* update check_state->started among all CPUs */ + smp_mb__before_atomic(); + if (atomic_dec_and_test(&check_state->started)) + wake_up(&check_state->wait); + + return ret; +} + + + +static int bch_btree_chkthread_nr(void) +{ + int n = num_online_cpus()/2; + + if (n == 0) + n = 1; + else if (n > BCH_BTR_CHKTHREAD_MAX) + n = BCH_BTR_CHKTHREAD_MAX; + + return n; +} + int bch_btree_check(struct cache_set *c) { - struct btree_op op; + int ret = 0; + int i; + struct bkey *k = NULL; + struct btree_iter iter; + struct btree_check_state check_state; - bch_btree_op_init(&op, SHRT_MAX); + /* check and mark root node keys */ + for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(c, c->root->level, k); - return btree_root(check_recurse, c, &op); + bch_initial_mark_key(c, c->root->level + 1, &c->root->key); + + if (c->root->level == 0) + return 0; + + memset(&check_state, 0, sizeof(struct btree_check_state)); + check_state.c = c; + check_state.total_threads = bch_btree_chkthread_nr(); + check_state.key_idx = 0; + spin_lock_init(&check_state.idx_lock); + atomic_set(&check_state.started, 0); + atomic_set(&check_state.enough, 0); + init_waitqueue_head(&check_state.wait); + + rw_lock(0, c->root, c->root->level); + /* + * Run multiple threads to check btree nodes in parallel, + * if check_state.enough is non-zero, it means current + * running check threads are enough, unncessary to create + * more. + */ + for (i = 0; i < check_state.total_threads; i++) { + /* fetch latest check_state.enough earlier */ + smp_mb__before_atomic(); + if (atomic_read(&check_state.enough)) + break; + + check_state.infos[i].result = 0; + check_state.infos[i].state = &check_state; + + check_state.infos[i].thread = + kthread_run(bch_btree_check_thread, + &check_state.infos[i], + "bch_btrchk[%d]", i); + if (IS_ERR(check_state.infos[i].thread)) { + pr_err("fails to run thread bch_btrchk[%d]\n", i); + for (--i; i >= 0; i--) + kthread_stop(check_state.infos[i].thread); + ret = -ENOMEM; + goto out; + } + atomic_inc(&check_state.started); + } + + /* + * Must wait for all threads to stop. + */ + wait_event(check_state.wait, atomic_read(&check_state.started) == 0); + + for (i = 0; i < check_state.total_threads; i++) { + if (check_state.infos[i].result) { + ret = check_state.infos[i].result; + goto out; + } + } + +out: + rw_unlock(0, c->root); + return ret; } void bch_initial_gc_finish(struct cache_set *c) { - struct cache *ca; + struct cache *ca = c->cache; struct bucket *b; - unsigned int i; bch_btree_gc_finish(c); @@ -1976,20 +2102,18 @@ * This is only safe for buckets that have no live data in them, which * there should always be some of. */ - for_each_cache(ca, c, i) { - for_each_bucket(b, ca) { - if (fifo_full(&ca->free[RESERVE_PRIO]) && - fifo_full(&ca->free[RESERVE_BTREE])) - break; + for_each_bucket(b, ca) { + if (fifo_full(&ca->free[RESERVE_PRIO]) && + fifo_full(&ca->free[RESERVE_BTREE])) + break; - if (bch_can_invalidate_bucket(ca, b) && - !GC_MARK(b)) { - __bch_invalidate_one_bucket(ca, b); - if (!fifo_push(&ca->free[RESERVE_PRIO], - b - ca->buckets)) - fifo_push(&ca->free[RESERVE_BTREE], - b - ca->buckets); - } + if (bch_can_invalidate_bucket(ca, b) && + !GC_MARK(b)) { + __bch_invalidate_one_bucket(ca, b); + if (!fifo_push(&ca->free[RESERVE_PRIO], + b - ca->buckets)) + fifo_push(&ca->free[RESERVE_BTREE], + b - ca->buckets); } } @@ -2097,7 +2221,7 @@ goto err; split = set_blocks(btree_bset_first(n1), - block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; + block_bytes(n1->c->cache)) > (btree_blocks(b) * 4) / 5; if (split) { unsigned int keys = 0; @@ -2344,7 +2468,7 @@ if (ret) { struct bkey *k; - pr_err("error %i", ret); + pr_err("error %i\n", ret); while ((k = bch_keylist_pop(keys))) bkey_put(c, k); @@ -2394,7 +2518,7 @@ while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { - ret = btree(map_nodes_recurse, k, b, + ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); from = NULL; @@ -2412,10 +2536,10 @@ int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_nodes_fn *fn, int flags) { - return btree_root(map_nodes_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags); } -static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, struct bkey *from, btree_map_keys_fn *fn, int flags) { @@ -2428,7 +2552,8 @@ while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) - : btree(map_keys_recurse, k, b, op, from, fn, flags); + : bcache_btree(map_keys_recurse, k, + b, op, from, fn, flags); from = NULL; if (ret != MAP_CONTINUE) @@ -2445,7 +2570,7 @@ int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_keys_fn *fn, int flags) { - return btree_root(map_keys_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags); } /* Keybuf code */ @@ -2631,7 +2756,7 @@ break; if (bkey_cmp(&buf->last_scanned, end) >= 0) { - pr_debug("scan finished"); + pr_debug("scan finished\n"); break; } @@ -2649,3 +2774,18 @@ spin_lock_init(&buf->lock); array_allocator_init(&buf->freelist); } + +void bch_btree_exit(void) +{ + if (btree_io_wq) + destroy_workqueue(btree_io_wq); +} + +int __init bch_btree_init(void) +{ + btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0); + if (!btree_io_wq) + return -ENOMEM; + + return 0; +} -- Gitblit v1.6.2