hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/block/blk-settings.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Functions related to setting various queue properties from drivers
34 */
....@@ -6,11 +7,12 @@
67 #include <linux/init.h>
78 #include <linux/bio.h>
89 #include <linux/blkdev.h>
9
-#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
10
+#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
1011 #include <linux/gcd.h>
1112 #include <linux/lcm.h>
1213 #include <linux/jiffies.h>
1314 #include <linux/gfp.h>
15
+#include <linux/dma-mapping.h>
1416
1517 #include "blk.h"
1618 #include "blk-wbt.h"
....@@ -20,64 +22,11 @@
2022
2123 unsigned long blk_max_pfn;
2224
23
-/**
24
- * blk_queue_prep_rq - set a prepare_request function for queue
25
- * @q: queue
26
- * @pfn: prepare_request function
27
- *
28
- * It's possible for a queue to register a prepare_request callback which
29
- * is invoked before the request is handed to the request_fn. The goal of
30
- * the function is to prepare a request for I/O, it can be used to build a
31
- * cdb from the request data for instance.
32
- *
33
- */
34
-void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
35
-{
36
- q->prep_rq_fn = pfn;
37
-}
38
-EXPORT_SYMBOL(blk_queue_prep_rq);
39
-
40
-/**
41
- * blk_queue_unprep_rq - set an unprepare_request function for queue
42
- * @q: queue
43
- * @ufn: unprepare_request function
44
- *
45
- * It's possible for a queue to register an unprepare_request callback
46
- * which is invoked before the request is finally completed. The goal
47
- * of the function is to deallocate any data that was allocated in the
48
- * prepare_request callback.
49
- *
50
- */
51
-void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
52
-{
53
- q->unprep_rq_fn = ufn;
54
-}
55
-EXPORT_SYMBOL(blk_queue_unprep_rq);
56
-
57
-void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
58
-{
59
- q->softirq_done_fn = fn;
60
-}
61
-EXPORT_SYMBOL(blk_queue_softirq_done);
62
-
6325 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
6426 {
6527 q->rq_timeout = timeout;
6628 }
6729 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
68
-
69
-void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
70
-{
71
- WARN_ON_ONCE(q->mq_ops);
72
- q->rq_timed_out_fn = fn;
73
-}
74
-EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
75
-
76
-void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
77
-{
78
- q->lld_busy_fn = fn;
79
-}
80
-EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
8130
8231 /**
8332 * blk_set_default_limits - reset limits to default values
....@@ -99,6 +48,7 @@
9948 lim->chunk_sectors = 0;
10049 lim->max_write_same_sectors = 0;
10150 lim->max_write_zeroes_sectors = 0;
51
+ lim->max_zone_append_sectors = 0;
10252 lim->max_discard_sectors = 0;
10353 lim->max_hw_discard_sectors = 0;
10454 lim->discard_granularity = 0;
....@@ -109,7 +59,6 @@
10959 lim->alignment_offset = 0;
11060 lim->io_opt = 0;
11161 lim->misaligned = 0;
112
- lim->cluster = 1;
11362 lim->zoned = BLK_ZONED_NONE;
11463 }
11564 EXPORT_SYMBOL(blk_set_default_limits);
....@@ -135,46 +84,9 @@
13584 lim->max_dev_sectors = UINT_MAX;
13685 lim->max_write_same_sectors = UINT_MAX;
13786 lim->max_write_zeroes_sectors = UINT_MAX;
87
+ lim->max_zone_append_sectors = UINT_MAX;
13888 }
13989 EXPORT_SYMBOL(blk_set_stacking_limits);
140
-
141
-/**
142
- * blk_queue_make_request - define an alternate make_request function for a device
143
- * @q: the request queue for the device to be affected
144
- * @mfn: the alternate make_request function
145
- *
146
- * Description:
147
- * The normal way for &struct bios to be passed to a device
148
- * driver is for them to be collected into requests on a request
149
- * queue, and then to allow the device driver to select requests
150
- * off that queue when it is ready. This works well for many block
151
- * devices. However some block devices (typically virtual devices
152
- * such as md or lvm) do not benefit from the processing on the
153
- * request queue, and are served best by having the requests passed
154
- * directly to them. This can be achieved by providing a function
155
- * to blk_queue_make_request().
156
- *
157
- * Caveat:
158
- * The driver that does this *must* be able to deal appropriately
159
- * with buffers in "highmemory". This can be accomplished by either calling
160
- * kmap_atomic() to get a temporary kernel mapping, or by calling
161
- * blk_queue_bounce() to create a buffer in normal memory.
162
- **/
163
-void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
164
-{
165
- /*
166
- * set defaults
167
- */
168
- q->nr_requests = BLKDEV_MAX_RQ;
169
-
170
- q->make_request_fn = mfn;
171
- blk_queue_dma_alignment(q, 511);
172
- blk_queue_congestion_threshold(q);
173
- q->nr_batching = BLK_BATCH_REQ;
174
-
175
- blk_set_default_limits(&q->limits);
176
-}
177
-EXPORT_SYMBOL(blk_queue_make_request);
17890
17991 /**
18092 * blk_queue_bounce_limit - set bounce buffer limit for queue
....@@ -260,15 +172,13 @@
260172 *
261173 * Description:
262174 * If a driver doesn't want IOs to cross a given chunk size, it can set
263
- * this limit and prevent merging across chunks. Note that the chunk size
264
- * must currently be a power-of-2 in sectors. Also note that the block
265
- * layer must accept a page worth of data at any offset. So if the
266
- * crossing of chunks is a hard limitation in the driver, it must still be
267
- * prepared to split single page bios.
175
+ * this limit and prevent merging across chunks. Note that the block layer
176
+ * must accept a page worth of data at any offset. So if the crossing of
177
+ * chunks is a hard limitation in the driver, it must still be prepared
178
+ * to split single page bios.
268179 **/
269180 void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
270181 {
271
- BUG_ON(!is_power_of_2(chunk_sectors));
272182 q->limits.chunk_sectors = chunk_sectors;
273183 }
274184 EXPORT_SYMBOL(blk_queue_chunk_sectors);
....@@ -310,6 +220,33 @@
310220 q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
311221 }
312222 EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
223
+
224
+/**
225
+ * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
226
+ * @q: the request queue for the device
227
+ * @max_zone_append_sectors: maximum number of sectors to write per command
228
+ **/
229
+void blk_queue_max_zone_append_sectors(struct request_queue *q,
230
+ unsigned int max_zone_append_sectors)
231
+{
232
+ unsigned int max_sectors;
233
+
234
+ if (WARN_ON(!blk_queue_is_zoned(q)))
235
+ return;
236
+
237
+ max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors);
238
+ max_sectors = min(q->limits.chunk_sectors, max_sectors);
239
+
240
+ /*
241
+ * Signal eventual driver bugs resulting in the max_zone_append sectors limit
242
+ * being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set,
243
+ * or the max_hw_sectors limit not set.
244
+ */
245
+ WARN_ON(!max_sectors);
246
+
247
+ q->limits.max_zone_append_sectors = max_sectors;
248
+}
249
+EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
313250
314251 /**
315252 * blk_queue_max_segments - set max hw segments for a request for this queue
....@@ -364,6 +301,9 @@
364301 printk(KERN_INFO "%s: set to minimum %d\n",
365302 __func__, max_size);
366303 }
304
+
305
+ /* see blk_queue_virt_boundary() for the explanation */
306
+ WARN_ON_ONCE(q->limits.virt_boundary_mask);
367307
368308 q->limits.max_segment_size = max_size;
369309 }
....@@ -431,6 +371,19 @@
431371 q->limits.misaligned = 0;
432372 }
433373 EXPORT_SYMBOL(blk_queue_alignment_offset);
374
+
375
+void blk_queue_update_readahead(struct request_queue *q)
376
+{
377
+ /*
378
+ * For read-ahead of large files to be effective, we need to read ahead
379
+ * at least twice the optimal I/O size.
380
+ */
381
+ q->backing_dev_info->ra_pages =
382
+ max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
383
+ q->backing_dev_info->io_pages =
384
+ queue_max_sectors(q) >> (PAGE_SHIFT - 9);
385
+}
386
+EXPORT_SYMBOL_GPL(blk_queue_update_readahead);
434387
435388 /**
436389 * blk_limits_io_min - set minimum request size for a device
....@@ -510,6 +463,8 @@
510463 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
511464 {
512465 blk_limits_io_opt(&q->limits, opt);
466
+ q->backing_dev_info->ra_pages =
467
+ max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
513468 }
514469 EXPORT_SYMBOL(blk_queue_io_opt);
515470
....@@ -520,17 +475,6 @@
520475 sectors = PAGE_SIZE >> SECTOR_SHIFT;
521476 return sectors;
522477 }
523
-
524
-/**
525
- * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
526
- * @t: the stacking driver (top)
527
- * @b: the underlying device (bottom)
528
- **/
529
-void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
530
-{
531
- blk_stack_limits(&t->limits, &b->limits, 0);
532
-}
533
-EXPORT_SYMBOL(blk_queue_stack_limits);
534478
535479 /**
536480 * blk_stack_limits - adjust queue_limits for stacked devices
....@@ -565,6 +509,8 @@
565509 b->max_write_same_sectors);
566510 t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
567511 b->max_write_zeroes_sectors);
512
+ t->max_zone_append_sectors = min(t->max_zone_append_sectors,
513
+ b->max_zone_append_sectors);
568514 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
569515
570516 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
....@@ -610,7 +556,9 @@
610556 t->io_min = max(t->io_min, b->io_min);
611557 t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
612558
613
- t->cluster &= b->cluster;
559
+ /* Set non-power-of-2 compatible chunk_sectors boundary */
560
+ if (b->chunk_sectors)
561
+ t->chunk_sectors = gcd(t->chunk_sectors, b->chunk_sectors);
614562
615563 /* Physical block size a multiple of the logical block size? */
616564 if (t->physical_block_size & (t->logical_block_size - 1)) {
....@@ -629,6 +577,13 @@
629577 /* Optimal I/O a multiple of the physical block size? */
630578 if (t->io_opt & (t->physical_block_size - 1)) {
631579 t->io_opt = 0;
580
+ t->misaligned = 1;
581
+ ret = -1;
582
+ }
583
+
584
+ /* chunk_sectors a multiple of the physical block size? */
585
+ if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
586
+ t->chunk_sectors = 0;
632587 t->misaligned = 1;
633588 ret = -1;
634589 }
....@@ -675,35 +630,10 @@
675630 t->discard_granularity;
676631 }
677632
678
- if (b->chunk_sectors)
679
- t->chunk_sectors = min_not_zero(t->chunk_sectors,
680
- b->chunk_sectors);
681
-
633
+ t->zoned = max(t->zoned, b->zoned);
682634 return ret;
683635 }
684636 EXPORT_SYMBOL(blk_stack_limits);
685
-
686
-/**
687
- * bdev_stack_limits - adjust queue limits for stacked drivers
688
- * @t: the stacking driver limits (top device)
689
- * @bdev: the component block_device (bottom)
690
- * @start: first data sector within component device
691
- *
692
- * Description:
693
- * Merges queue limits for a top device and a block_device. Returns
694
- * 0 if alignment didn't change. Returns -1 if adding the bottom
695
- * device caused misalignment.
696
- */
697
-int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
698
- sector_t start)
699
-{
700
- struct request_queue *bq = bdev_get_queue(bdev);
701
-
702
- start += get_start_sect(bdev);
703
-
704
- return blk_stack_limits(t, &bq->limits, start);
705
-}
706
-EXPORT_SYMBOL(bdev_stack_limits);
707637
708638 /**
709639 * disk_stack_limits - adjust queue limits for stacked drivers
....@@ -720,7 +650,8 @@
720650 {
721651 struct request_queue *t = disk->queue;
722652
723
- if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
653
+ if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
654
+ get_start_sect(bdev) + (offset >> 9)) < 0) {
724655 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
725656
726657 disk_name(disk, 0, top);
....@@ -730,26 +661,9 @@
730661 top, bottom);
731662 }
732663
733
- t->backing_dev_info->io_pages =
734
- t->limits.max_sectors >> (PAGE_SHIFT - 9);
664
+ blk_queue_update_readahead(disk->queue);
735665 }
736666 EXPORT_SYMBOL(disk_stack_limits);
737
-
738
-/**
739
- * blk_queue_dma_pad - set pad mask
740
- * @q: the request queue for the device
741
- * @mask: pad mask
742
- *
743
- * Set dma pad mask.
744
- *
745
- * Appending pad buffer to a request modifies the last entry of a
746
- * scatter list such that it includes the pad buffer.
747
- **/
748
-void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
749
-{
750
- q->dma_pad_mask = mask;
751
-}
752
-EXPORT_SYMBOL(blk_queue_dma_pad);
753667
754668 /**
755669 * blk_queue_update_dma_pad - update pad mask
....@@ -767,43 +681,6 @@
767681 q->dma_pad_mask = mask;
768682 }
769683 EXPORT_SYMBOL(blk_queue_update_dma_pad);
770
-
771
-/**
772
- * blk_queue_dma_drain - Set up a drain buffer for excess dma.
773
- * @q: the request queue for the device
774
- * @dma_drain_needed: fn which returns non-zero if drain is necessary
775
- * @buf: physically contiguous buffer
776
- * @size: size of the buffer in bytes
777
- *
778
- * Some devices have excess DMA problems and can't simply discard (or
779
- * zero fill) the unwanted piece of the transfer. They have to have a
780
- * real area of memory to transfer it into. The use case for this is
781
- * ATAPI devices in DMA mode. If the packet command causes a transfer
782
- * bigger than the transfer size some HBAs will lock up if there
783
- * aren't DMA elements to contain the excess transfer. What this API
784
- * does is adjust the queue so that the buf is always appended
785
- * silently to the scatterlist.
786
- *
787
- * Note: This routine adjusts max_hw_segments to make room for appending
788
- * the drain buffer. If you call blk_queue_max_segments() after calling
789
- * this routine, you must set the limit to one fewer than your device
790
- * can support otherwise there won't be room for the drain buffer.
791
- */
792
-int blk_queue_dma_drain(struct request_queue *q,
793
- dma_drain_needed_fn *dma_drain_needed,
794
- void *buf, unsigned int size)
795
-{
796
- if (queue_max_segments(q) < 2)
797
- return -EINVAL;
798
- /* make room for appending the drain */
799
- blk_queue_max_segments(q, queue_max_segments(q) - 1);
800
- q->dma_drain_needed = dma_drain_needed;
801
- q->dma_drain_buffer = buf;
802
- q->dma_drain_size = size;
803
-
804
- return 0;
805
-}
806
-EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
807684
808685 /**
809686 * blk_queue_segment_boundary - set boundary rules for segment merging
....@@ -830,6 +707,15 @@
830707 void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
831708 {
832709 q->limits.virt_boundary_mask = mask;
710
+
711
+ /*
712
+ * Devices that require a virtual boundary do not support scatter/gather
713
+ * I/O natively, but instead require a descriptor list entry for each
714
+ * page (which might not be idential to the Linux PAGE_SIZE). Because
715
+ * of that they are not limited by our notion of "segment size".
716
+ */
717
+ if (mask)
718
+ q->limits.max_segment_size = UINT_MAX;
833719 }
834720 EXPORT_SYMBOL(blk_queue_virt_boundary);
835721
....@@ -872,15 +758,6 @@
872758 }
873759 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
874760
875
-void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
876
-{
877
- if (queueable)
878
- blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
879
- else
880
- blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
881
-}
882
-EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
883
-
884761 /**
885762 * blk_set_queue_depth - tell the block layer about the device queue depth
886763 * @q: the request queue for the device
....@@ -890,7 +767,7 @@
890767 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
891768 {
892769 q->queue_depth = depth;
893
- wbt_set_queue_depth(q, depth);
770
+ rq_qos_queue_depth_changed(q);
894771 }
895772 EXPORT_SYMBOL(blk_set_queue_depth);
896773
....@@ -904,21 +781,103 @@
904781 */
905782 void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
906783 {
907
- spin_lock_irq(q->queue_lock);
908784 if (wc)
909
- queue_flag_set(QUEUE_FLAG_WC, q);
785
+ blk_queue_flag_set(QUEUE_FLAG_WC, q);
910786 else
911
- queue_flag_clear(QUEUE_FLAG_WC, q);
787
+ blk_queue_flag_clear(QUEUE_FLAG_WC, q);
912788 if (fua)
913
- queue_flag_set(QUEUE_FLAG_FUA, q);
789
+ blk_queue_flag_set(QUEUE_FLAG_FUA, q);
914790 else
915
- queue_flag_clear(QUEUE_FLAG_FUA, q);
916
- spin_unlock_irq(q->queue_lock);
791
+ blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
917792
918793 wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
919794 }
920795 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
921796
797
+/**
798
+ * blk_queue_required_elevator_features - Set a queue required elevator features
799
+ * @q: the request queue for the target device
800
+ * @features: Required elevator features OR'ed together
801
+ *
802
+ * Tell the block layer that for the device controlled through @q, only the
803
+ * only elevators that can be used are those that implement at least the set of
804
+ * features specified by @features.
805
+ */
806
+void blk_queue_required_elevator_features(struct request_queue *q,
807
+ unsigned int features)
808
+{
809
+ q->required_elevator_features = features;
810
+}
811
+EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
812
+
813
+/**
814
+ * blk_queue_can_use_dma_map_merging - configure queue for merging segments.
815
+ * @q: the request queue for the device
816
+ * @dev: the device pointer for dma
817
+ *
818
+ * Tell the block layer about merging the segments by dma map of @q.
819
+ */
820
+bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
821
+ struct device *dev)
822
+{
823
+ unsigned long boundary = dma_get_merge_boundary(dev);
824
+
825
+ if (!boundary)
826
+ return false;
827
+
828
+ /* No need to update max_segment_size. see blk_queue_virt_boundary() */
829
+ blk_queue_virt_boundary(q, boundary);
830
+
831
+ return true;
832
+}
833
+EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
834
+
835
+/**
836
+ * blk_queue_set_zoned - configure a disk queue zoned model.
837
+ * @disk: the gendisk of the queue to configure
838
+ * @model: the zoned model to set
839
+ *
840
+ * Set the zoned model of the request queue of @disk according to @model.
841
+ * When @model is BLK_ZONED_HM (host managed), this should be called only
842
+ * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option).
843
+ * If @model specifies BLK_ZONED_HA (host aware), the effective model used
844
+ * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions
845
+ * on the disk.
846
+ */
847
+void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
848
+{
849
+ switch (model) {
850
+ case BLK_ZONED_HM:
851
+ /*
852
+ * Host managed devices are supported only if
853
+ * CONFIG_BLK_DEV_ZONED is enabled.
854
+ */
855
+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED));
856
+ break;
857
+ case BLK_ZONED_HA:
858
+ /*
859
+ * Host aware devices can be treated either as regular block
860
+ * devices (similar to drive managed devices) or as zoned block
861
+ * devices to take advantage of the zone command set, similarly
862
+ * to host managed devices. We try the latter if there are no
863
+ * partitions and zoned block device support is enabled, else
864
+ * we do nothing special as far as the block layer is concerned.
865
+ */
866
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
867
+ disk_has_partitions(disk))
868
+ model = BLK_ZONED_NONE;
869
+ break;
870
+ case BLK_ZONED_NONE:
871
+ default:
872
+ if (WARN_ON_ONCE(model != BLK_ZONED_NONE))
873
+ model = BLK_ZONED_NONE;
874
+ break;
875
+ }
876
+
877
+ disk->queue->limits.zoned = model;
878
+}
879
+EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
880
+
922881 static int __init blk_settings_init(void)
923882 {
924883 blk_max_low_pfn = max_low_pfn - 1;