hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/block/drbd/drbd_main.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 drbd.c
34
....@@ -10,19 +11,6 @@
1011 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
1112 from Logicworks, Inc. for making SDP replication support possible.
1213
13
- drbd is free software; you can redistribute it and/or modify
14
- it under the terms of the GNU General Public License as published by
15
- the Free Software Foundation; either version 2, or (at your option)
16
- any later version.
17
-
18
- drbd is distributed in the hope that it will be useful,
19
- but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
- GNU General Public License for more details.
22
-
23
- You should have received a copy of the GNU General Public License
24
- along with drbd; see the file COPYING. If not, write to
25
- the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
2614
2715 */
2816
....@@ -144,9 +132,10 @@
144132 DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
145133
146134 static const struct block_device_operations drbd_ops = {
147
- .owner = THIS_MODULE,
148
- .open = drbd_open,
149
- .release = drbd_release,
135
+ .owner = THIS_MODULE,
136
+ .submit_bio = drbd_submit_bio,
137
+ .open = drbd_open,
138
+ .release = drbd_release,
150139 };
151140
152141 struct bio *bio_alloc_drbd(gfp_t gfp_mask)
....@@ -195,7 +184,7 @@
195184 unsigned int set_size)
196185 {
197186 struct drbd_request *r;
198
- struct drbd_request *req = NULL;
187
+ struct drbd_request *req = NULL, *tmp = NULL;
199188 int expect_epoch = 0;
200189 int expect_size = 0;
201190
....@@ -249,8 +238,11 @@
249238 * to catch requests being barrier-acked "unexpectedly".
250239 * It usually should find the same req again, or some READ preceding it. */
251240 list_for_each_entry(req, &connection->transfer_log, tl_requests)
252
- if (req->epoch == expect_epoch)
241
+ if (req->epoch == expect_epoch) {
242
+ tmp = req;
253243 break;
244
+ }
245
+ req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
254246 list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
255247 if (req->epoch != expect_epoch)
256248 break;
....@@ -441,7 +433,7 @@
441433 thi->t_state = RESTARTING;
442434 drbd_info(resource, "Restarting %s thread (from %s [%d])\n",
443435 thi->name, current->comm, current->pid);
444
- /* fall through */
436
+ fallthrough;
445437 case RUNNING:
446438 case RESTARTING:
447439 default:
....@@ -479,7 +471,7 @@
479471 smp_mb();
480472 init_completion(&thi->stop);
481473 if (thi->task != current)
482
- force_sig(DRBD_SIGKILL, thi->task);
474
+ send_sig(DRBD_SIGKILL, thi->task, 1);
483475 }
484476
485477 spin_unlock_irqrestore(&thi->t_lock, flags);
....@@ -672,7 +664,7 @@
672664 /* DRBD protocol "pings" are latency critical.
673665 * This is supposed to trigger tcp_push_pending_frames() */
674666 if (!err && (cmd == P_PING || cmd == P_PING_ACK))
675
- drbd_tcp_nodelay(sock->socket);
667
+ tcp_sock_set_nodelay(sock->socket->sk);
676668
677669 return err;
678670 }
....@@ -995,7 +987,10 @@
995987
996988 p->d_size = cpu_to_be64(d_size);
997989 p->u_size = cpu_to_be64(u_size);
998
- p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(device->this_bdev));
990
+ if (trigger_reply)
991
+ p->c_size = 0;
992
+ else
993
+ p->c_size = cpu_to_be64(get_capacity(device->vdisk));
999994 p->max_bio_size = cpu_to_be32(max_bio_size);
1000995 p->queue_order_type = cpu_to_be16(q_order_type);
1001996 p->dds_flags = cpu_to_be16(flags);
....@@ -1378,7 +1373,7 @@
13781373 struct p_data *dp, int data_size)
13791374 {
13801375 if (peer_device->connection->peer_integrity_tfm)
1381
- data_size -= crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
1376
+ data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
13821377 _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size),
13831378 dp->block_id);
13841379 }
....@@ -1564,7 +1559,7 @@
15641559 * put_page(); and would cause either a VM_BUG directly, or
15651560 * __page_cache_release a page that would actually still be referenced
15661561 * by someone, leading to some obscure delayed Oops somewhere else. */
1567
- if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page))
1562
+ if (drbd_disable_sendpage || !sendpage_ok(page))
15681563 return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
15691564
15701565 msg_flags |= MSG_NOSIGNAL;
....@@ -1669,12 +1664,16 @@
16691664 (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
16701665 (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
16711666 (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
1672
- (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
1667
+ (bio_op(bio) == REQ_OP_WRITE_ZEROES ?
1668
+ ((connection->agreed_features & DRBD_FF_WZEROES) ?
1669
+ (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
1670
+ : DP_DISCARD)
1671
+ : 0);
16731672 else
16741673 return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
16751674 }
16761675
1677
-/* Used to send write or TRIM aka REQ_DISCARD requests
1676
+/* Used to send write or TRIM aka REQ_OP_DISCARD requests
16781677 * R_PRIMARY -> Peer (P_DATA, P_TRIM)
16791678 */
16801679 int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
....@@ -1691,7 +1690,7 @@
16911690 sock = &peer_device->connection->data;
16921691 p = drbd_prepare_command(peer_device, sock);
16931692 digest_size = peer_device->connection->integrity_tfm ?
1694
- crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0;
1693
+ crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
16951694
16961695 if (!p)
16971696 return -EIO;
....@@ -1713,10 +1712,11 @@
17131712 }
17141713 p->dp_flags = cpu_to_be32(dp_flags);
17151714
1716
- if (dp_flags & DP_DISCARD) {
1715
+ if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
1716
+ enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
17171717 struct p_trim *t = (struct p_trim*)p;
17181718 t->size = cpu_to_be32(req->i.size);
1719
- err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
1719
+ err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
17201720 goto out;
17211721 }
17221722 if (dp_flags & DP_WSAME) {
....@@ -1797,7 +1797,7 @@
17971797 p = drbd_prepare_command(peer_device, sock);
17981798
17991799 digest_size = peer_device->connection->integrity_tfm ?
1800
- crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0;
1800
+ crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
18011801
18021802 if (!p)
18031803 return -EIO;
....@@ -1857,7 +1857,7 @@
18571857
18581858 /* THINK if (signal_pending) return ... ? */
18591859
1860
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
1860
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
18611861
18621862 if (sock == connection->data.socket) {
18631863 rcu_read_lock();
....@@ -2035,6 +2035,17 @@
20352035 device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
20362036 }
20372037
2038
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
2039
+{
2040
+ char ppb[10];
2041
+
2042
+ set_capacity(device->vdisk, size);
2043
+ revalidate_disk_size(device->vdisk, false);
2044
+
2045
+ drbd_info(device, "size = %s (%llu KB)\n",
2046
+ ppsize(ppb, size>>1), (unsigned long long)size>>1);
2047
+}
2048
+
20382049 void drbd_device_cleanup(struct drbd_device *device)
20392050 {
20402051 int i;
....@@ -2060,7 +2071,8 @@
20602071 }
20612072 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
20622073
2063
- drbd_set_my_capacity(device, 0);
2074
+ set_capacity(device->vdisk, 0);
2075
+ revalidate_disk_size(device->vdisk, false);
20642076 if (device->bitmap) {
20652077 /* maybe never allocated. */
20662078 drbd_bm_resize(device, 0, 1);
....@@ -2227,9 +2239,6 @@
22272239 /* cleanup stuff that may have been allocated during
22282240 * device (re-)configuration or state changes */
22292241
2230
- if (device->this_bdev)
2231
- bdput(device->this_bdev);
2232
-
22332242 drbd_backing_dev_free(device, device->ldev);
22342243 device->ldev = NULL;
22352244
....@@ -2316,7 +2325,7 @@
23162325 * workqueues instead.
23172326 */
23182327
2319
- /* We are not just doing generic_make_request(),
2328
+ /* We are not just doing submit_bio_noacct(),
23202329 * as we want to keep the start_time information. */
23212330 inc_ap_bio(device);
23222331 __drbd_make_request(device, bio, start_jif);
....@@ -2404,62 +2413,6 @@
24042413 idr_destroy(&drbd_devices);
24052414
24062415 pr_info("module cleanup done.\n");
2407
-}
2408
-
2409
-/**
2410
- * drbd_congested() - Callback for the flusher thread
2411
- * @congested_data: User data
2412
- * @bdi_bits: Bits the BDI flusher thread is currently interested in
2413
- *
2414
- * Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
2415
- */
2416
-static int drbd_congested(void *congested_data, int bdi_bits)
2417
-{
2418
- struct drbd_device *device = congested_data;
2419
- struct request_queue *q;
2420
- char reason = '-';
2421
- int r = 0;
2422
-
2423
- if (!may_inc_ap_bio(device)) {
2424
- /* DRBD has frozen IO */
2425
- r = bdi_bits;
2426
- reason = 'd';
2427
- goto out;
2428
- }
2429
-
2430
- if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
2431
- r |= (1 << WB_async_congested);
2432
- /* Without good local data, we would need to read from remote,
2433
- * and that would need the worker thread as well, which is
2434
- * currently blocked waiting for that usermode helper to
2435
- * finish.
2436
- */
2437
- if (!get_ldev_if_state(device, D_UP_TO_DATE))
2438
- r |= (1 << WB_sync_congested);
2439
- else
2440
- put_ldev(device);
2441
- r &= bdi_bits;
2442
- reason = 'c';
2443
- goto out;
2444
- }
2445
-
2446
- if (get_ldev(device)) {
2447
- q = bdev_get_queue(device->ldev->backing_bdev);
2448
- r = bdi_congested(q->backing_dev_info, bdi_bits);
2449
- put_ldev(device);
2450
- if (r)
2451
- reason = 'b';
2452
- }
2453
-
2454
- if (bdi_bits & (1 << WB_async_congested) &&
2455
- test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
2456
- r |= (1 << WB_async_congested);
2457
- reason = reason == 'b' ? 'a' : 'n';
2458
- }
2459
-
2460
-out:
2461
- device->congestion_reason = reason;
2462
- return r;
24632416 }
24642417
24652418 static void drbd_init_workqueue(struct drbd_work_queue* wq)
....@@ -2558,11 +2511,11 @@
25582511 {
25592512 drbd_free_sock(connection);
25602513
2561
- crypto_free_ahash(connection->csums_tfm);
2562
- crypto_free_ahash(connection->verify_tfm);
2514
+ crypto_free_shash(connection->csums_tfm);
2515
+ crypto_free_shash(connection->verify_tfm);
25632516 crypto_free_shash(connection->cram_hmac_tfm);
2564
- crypto_free_ahash(connection->integrity_tfm);
2565
- crypto_free_ahash(connection->peer_integrity_tfm);
2517
+ crypto_free_shash(connection->integrity_tfm);
2518
+ crypto_free_shash(connection->peer_integrity_tfm);
25662519 kfree(connection->int_dig_in);
25672520 kfree(connection->int_dig_vv);
25682521
....@@ -2767,7 +2720,7 @@
27672720 enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
27682721 {
27692722 struct drbd_resource *resource = adm_ctx->resource;
2770
- struct drbd_connection *connection;
2723
+ struct drbd_connection *connection, *n;
27712724 struct drbd_device *device;
27722725 struct drbd_peer_device *peer_device, *tmp_peer_device;
27732726 struct gendisk *disk;
....@@ -2793,11 +2746,10 @@
27932746
27942747 drbd_init_set_defaults(device);
27952748
2796
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
2749
+ q = blk_alloc_queue(NUMA_NO_NODE);
27972750 if (!q)
27982751 goto out_no_q;
27992752 device->rq_queue = q;
2800
- q->queuedata = device;
28012753
28022754 disk = alloc_disk(1);
28032755 if (!disk)
....@@ -2813,14 +2765,6 @@
28132765 sprintf(disk->disk_name, "drbd%d", minor);
28142766 disk->private_data = device;
28152767
2816
- device->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2817
- /* we have no partitions. we contain only ourselves. */
2818
- device->this_bdev->bd_contains = device->this_bdev;
2819
-
2820
- q->backing_dev_info->congested_fn = drbd_congested;
2821
- q->backing_dev_info->congested_data = device;
2822
-
2823
- blk_queue_make_request(q, drbd_make_request);
28242768 blk_queue_write_cache(q, true, true);
28252769 /* Setting the max_hw_sectors to an odd value of 8kibyte here
28262770 This triggers a max_bio_size message upon first attach or connect */
....@@ -2875,7 +2819,7 @@
28752819
28762820 if (init_submitter(device)) {
28772821 err = ERR_NOMEM;
2878
- goto out_idr_remove_vol;
2822
+ goto out_idr_remove_from_resource;
28792823 }
28802824
28812825 add_disk(disk);
....@@ -2892,10 +2836,8 @@
28922836 drbd_debugfs_device_add(device);
28932837 return NO_ERROR;
28942838
2895
-out_idr_remove_vol:
2896
- idr_remove(&connection->peer_devices, vnr);
28972839 out_idr_remove_from_resource:
2898
- for_each_connection(connection, resource) {
2840
+ for_each_connection_safe(connection, n, resource) {
28992841 peer_device = idr_remove(&connection->peer_devices, vnr);
29002842 if (peer_device)
29012843 kref_put(&connection->kref, drbd_destroy_connection);
....@@ -3002,8 +2944,7 @@
30022944 spin_lock_init(&retry.lock);
30032945 INIT_LIST_HEAD(&retry.writes);
30042946
3005
- if (drbd_debugfs_init())
3006
- pr_notice("failed to initialize debugfs -- will not be available\n");
2947
+ drbd_debugfs_init();
30072948
30082949 pr_info("initialized. "
30092950 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
....@@ -3097,7 +3038,7 @@
30973038
30983039 memset(buffer, 0, sizeof(*buffer));
30993040
3100
- buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(device->this_bdev));
3041
+ buffer->la_size_sect = cpu_to_be64(get_capacity(device->vdisk));
31013042 for (i = UI_CURRENT; i < UI_SIZE; i++)
31023043 buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]);
31033044 buffer->flags = cpu_to_be32(device->ldev->md.flags);
....@@ -3155,7 +3096,7 @@
31553096
31563097 /* Update device->ldev->md.la_size_sect,
31573098 * since we updated it on metadata. */
3158
- device->ldev->md.la_size_sect = drbd_get_capacity(device->this_bdev);
3099
+ device->ldev->md.la_size_sect = get_capacity(device->vdisk);
31593100
31603101 drbd_md_put_buffer(device);
31613102 out:
....@@ -3407,22 +3348,11 @@
34073348 * the meta-data super block. This function sets MD_DIRTY, and starts a
34083349 * timer that ensures that within five seconds you have to call drbd_md_sync().
34093350 */
3410
-#ifdef DEBUG
3411
-void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func)
3412
-{
3413
- if (!test_and_set_bit(MD_DIRTY, &device->flags)) {
3414
- mod_timer(&device->md_sync_timer, jiffies + HZ);
3415
- device->last_md_mark_dirty.line = line;
3416
- device->last_md_mark_dirty.func = func;
3417
- }
3418
-}
3419
-#else
34203351 void drbd_md_mark_dirty(struct drbd_device *device)
34213352 {
34223353 if (!test_and_set_bit(MD_DIRTY, &device->flags))
34233354 mod_timer(&device->md_sync_timer, jiffies + 5*HZ);
34243355 }
3425
-#endif
34263356
34273357 void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local)
34283358 {
....@@ -3699,9 +3629,8 @@
36993629 * when we want to support more than
37003630 * one PRO_VERSION */
37013631 static const char *cmdnames[] = {
3632
+
37023633 [P_DATA] = "Data",
3703
- [P_WSAME] = "WriteSame",
3704
- [P_TRIM] = "Trim",
37053634 [P_DATA_REPLY] = "DataReply",
37063635 [P_RS_DATA_REPLY] = "RSDataReply",
37073636 [P_BARRIER] = "Barrier",
....@@ -3712,7 +3641,6 @@
37123641 [P_DATA_REQUEST] = "DataRequest",
37133642 [P_RS_DATA_REQUEST] = "RSDataRequest",
37143643 [P_SYNC_PARAM] = "SyncParam",
3715
- [P_SYNC_PARAM89] = "SyncParam89",
37163644 [P_PROTOCOL] = "ReportProtocol",
37173645 [P_UUIDS] = "ReportUUIDs",
37183646 [P_SIZES] = "ReportSizes",
....@@ -3720,6 +3648,7 @@
37203648 [P_SYNC_UUID] = "ReportSyncUUID",
37213649 [P_AUTH_CHALLENGE] = "AuthChallenge",
37223650 [P_AUTH_RESPONSE] = "AuthResponse",
3651
+ [P_STATE_CHG_REQ] = "StateChgRequest",
37233652 [P_PING] = "Ping",
37243653 [P_PING_ACK] = "PingAck",
37253654 [P_RECV_ACK] = "RecvAck",
....@@ -3730,24 +3659,26 @@
37303659 [P_NEG_DREPLY] = "NegDReply",
37313660 [P_NEG_RS_DREPLY] = "NegRSDReply",
37323661 [P_BARRIER_ACK] = "BarrierAck",
3733
- [P_STATE_CHG_REQ] = "StateChgRequest",
37343662 [P_STATE_CHG_REPLY] = "StateChgReply",
37353663 [P_OV_REQUEST] = "OVRequest",
37363664 [P_OV_REPLY] = "OVReply",
37373665 [P_OV_RESULT] = "OVResult",
37383666 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
37393667 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3668
+ [P_SYNC_PARAM89] = "SyncParam89",
37403669 [P_COMPRESSED_BITMAP] = "CBitmap",
37413670 [P_DELAY_PROBE] = "DelayProbe",
37423671 [P_OUT_OF_SYNC] = "OutOfSync",
3743
- [P_RETRY_WRITE] = "RetryWrite",
37443672 [P_RS_CANCEL] = "RSCancel",
37453673 [P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
37463674 [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
37473675 [P_RETRY_WRITE] = "retry_write",
37483676 [P_PROTOCOL_UPDATE] = "protocol_update",
3677
+ [P_TRIM] = "Trim",
37493678 [P_RS_THIN_REQ] = "rs_thin_req",
37503679 [P_RS_DEALLOCATED] = "rs_deallocated",
3680
+ [P_WSAME] = "WriteSame",
3681
+ [P_ZEROES] = "Zeroes",
37513682
37523683 /* enum drbd_packet, but not commands - obsoleted flags:
37533684 * P_MAY_IGNORE