.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Zoned block device handling |
---|
3 | 4 | * |
---|
.. | .. |
---|
12 | 13 | #include <linux/module.h> |
---|
13 | 14 | #include <linux/rbtree.h> |
---|
14 | 15 | #include <linux/blkdev.h> |
---|
| 16 | +#include <linux/blk-mq.h> |
---|
| 17 | +#include <linux/mm.h> |
---|
| 18 | +#include <linux/vmalloc.h> |
---|
| 19 | +#include <linux/sched/mm.h> |
---|
| 20 | + |
---|
| 21 | +#include "blk.h" |
---|
| 22 | + |
---|
| 23 | +#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name |
---|
| 24 | +static const char *const zone_cond_name[] = { |
---|
| 25 | + ZONE_COND_NAME(NOT_WP), |
---|
| 26 | + ZONE_COND_NAME(EMPTY), |
---|
| 27 | + ZONE_COND_NAME(IMP_OPEN), |
---|
| 28 | + ZONE_COND_NAME(EXP_OPEN), |
---|
| 29 | + ZONE_COND_NAME(CLOSED), |
---|
| 30 | + ZONE_COND_NAME(READONLY), |
---|
| 31 | + ZONE_COND_NAME(FULL), |
---|
| 32 | + ZONE_COND_NAME(OFFLINE), |
---|
| 33 | +}; |
---|
| 34 | +#undef ZONE_COND_NAME |
---|
| 35 | + |
---|
| 36 | +/** |
---|
| 37 | + * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. |
---|
| 38 | + * @zone_cond: BLK_ZONE_COND_XXX. |
---|
| 39 | + * |
---|
| 40 | + * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX |
---|
| 41 | + * into string format. Useful in the debugging and tracing zone conditions. For |
---|
| 42 | + * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". |
---|
| 43 | + */ |
---|
| 44 | +const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) |
---|
| 45 | +{ |
---|
| 46 | + static const char *zone_cond_str = "UNKNOWN"; |
---|
| 47 | + |
---|
| 48 | + if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) |
---|
| 49 | + zone_cond_str = zone_cond_name[zone_cond]; |
---|
| 50 | + |
---|
| 51 | + return zone_cond_str; |
---|
| 52 | +} |
---|
| 53 | +EXPORT_SYMBOL_GPL(blk_zone_cond_str); |
---|
15 | 54 | |
---|
16 | 55 | static inline sector_t blk_zone_start(struct request_queue *q, |
---|
17 | 56 | sector_t sector) |
---|
.. | .. |
---|
43 | 82 | } |
---|
44 | 83 | EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock); |
---|
45 | 84 | |
---|
| 85 | +bool blk_req_zone_write_trylock(struct request *rq) |
---|
| 86 | +{ |
---|
| 87 | + unsigned int zno = blk_rq_zone_no(rq); |
---|
| 88 | + |
---|
| 89 | + if (test_and_set_bit(zno, rq->q->seq_zones_wlock)) |
---|
| 90 | + return false; |
---|
| 91 | + |
---|
| 92 | + WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); |
---|
| 93 | + rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; |
---|
| 94 | + |
---|
| 95 | + return true; |
---|
| 96 | +} |
---|
| 97 | +EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock); |
---|
| 98 | + |
---|
46 | 99 | void __blk_req_zone_write_lock(struct request *rq) |
---|
47 | 100 | { |
---|
48 | 101 | if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), |
---|
.. | .. |
---|
63 | 116 | } |
---|
64 | 117 | EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); |
---|
65 | 118 | |
---|
66 | | -/* |
---|
67 | | - * Check that a zone report belongs to the partition. |
---|
68 | | - * If yes, fix its start sector and write pointer, copy it in the |
---|
69 | | - * zone information array and return true. Return false otherwise. |
---|
| 119 | +/** |
---|
| 120 | + * blkdev_nr_zones - Get number of zones |
---|
| 121 | + * @disk: Target gendisk |
---|
| 122 | + * |
---|
| 123 | + * Return the total number of zones of a zoned block device. For a block |
---|
| 124 | + * device without zone capabilities, the number of zones is always 0. |
---|
70 | 125 | */ |
---|
71 | | -static bool blkdev_report_zone(struct block_device *bdev, |
---|
72 | | - struct blk_zone *rep, |
---|
73 | | - struct blk_zone *zone) |
---|
| 126 | +unsigned int blkdev_nr_zones(struct gendisk *disk) |
---|
74 | 127 | { |
---|
75 | | - sector_t offset = get_start_sect(bdev); |
---|
| 128 | + sector_t zone_sectors = blk_queue_zone_sectors(disk->queue); |
---|
76 | 129 | |
---|
77 | | - if (rep->start < offset) |
---|
78 | | - return false; |
---|
79 | | - |
---|
80 | | - rep->start -= offset; |
---|
81 | | - if (rep->start + rep->len > bdev->bd_part->nr_sects) |
---|
82 | | - return false; |
---|
83 | | - |
---|
84 | | - if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL) |
---|
85 | | - rep->wp = rep->start + rep->len; |
---|
86 | | - else |
---|
87 | | - rep->wp -= offset; |
---|
88 | | - memcpy(zone, rep, sizeof(struct blk_zone)); |
---|
89 | | - |
---|
90 | | - return true; |
---|
| 130 | + if (!blk_queue_is_zoned(disk->queue)) |
---|
| 131 | + return 0; |
---|
| 132 | + return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors); |
---|
91 | 133 | } |
---|
| 134 | +EXPORT_SYMBOL_GPL(blkdev_nr_zones); |
---|
92 | 135 | |
---|
93 | 136 | /** |
---|
94 | 137 | * blkdev_report_zones - Get zones information |
---|
95 | 138 | * @bdev: Target block device |
---|
96 | 139 | * @sector: Sector from which to report zones |
---|
97 | | - * @zones: Array of zone structures where to return the zones information |
---|
98 | | - * @nr_zones: Number of zone structures in the zone array |
---|
99 | | - * @gfp_mask: Memory allocation flags (for bio_alloc) |
---|
| 140 | + * @nr_zones: Maximum number of zones to report |
---|
| 141 | + * @cb: Callback function called for each reported zone |
---|
| 142 | + * @data: Private data for the callback |
---|
100 | 143 | * |
---|
101 | 144 | * Description: |
---|
102 | | - * Get zone information starting from the zone containing @sector. |
---|
103 | | - * The number of zone information reported may be less than the number |
---|
104 | | - * requested by @nr_zones. The number of zones actually reported is |
---|
105 | | - * returned in @nr_zones. |
---|
| 145 | + * Get zone information starting from the zone containing @sector for at most |
---|
| 146 | + * @nr_zones, and call @cb for each zone reported by the device. |
---|
| 147 | + * To report all zones in a device starting from @sector, the BLK_ALL_ZONES |
---|
| 148 | + * constant can be passed to @nr_zones. |
---|
| 149 | + * Returns the number of zones reported by the device, or a negative errno |
---|
| 150 | + * value in case of failure. |
---|
| 151 | + * |
---|
| 152 | + * Note: The caller must use memalloc_noXX_save/restore() calls to control |
---|
| 153 | + * memory allocations done within this function. |
---|
106 | 154 | */ |
---|
107 | | -int blkdev_report_zones(struct block_device *bdev, |
---|
108 | | - sector_t sector, |
---|
109 | | - struct blk_zone *zones, |
---|
110 | | - unsigned int *nr_zones, |
---|
111 | | - gfp_t gfp_mask) |
---|
| 155 | +int blkdev_report_zones(struct block_device *bdev, sector_t sector, |
---|
| 156 | + unsigned int nr_zones, report_zones_cb cb, void *data) |
---|
112 | 157 | { |
---|
113 | | - struct request_queue *q = bdev_get_queue(bdev); |
---|
114 | | - struct blk_zone_report_hdr *hdr; |
---|
115 | | - unsigned int nrz = *nr_zones; |
---|
116 | | - struct page *page; |
---|
117 | | - unsigned int nr_rep; |
---|
118 | | - size_t rep_bytes; |
---|
119 | | - unsigned int nr_pages; |
---|
120 | | - struct bio *bio; |
---|
121 | | - struct bio_vec *bv; |
---|
122 | | - unsigned int i, n, nz; |
---|
123 | | - unsigned int ofst; |
---|
124 | | - void *addr; |
---|
125 | | - int ret; |
---|
| 158 | + struct gendisk *disk = bdev->bd_disk; |
---|
| 159 | + sector_t capacity = get_capacity(disk); |
---|
126 | 160 | |
---|
127 | | - if (!q) |
---|
128 | | - return -ENXIO; |
---|
129 | | - |
---|
130 | | - if (!blk_queue_is_zoned(q)) |
---|
| 161 | + if (!blk_queue_is_zoned(bdev_get_queue(bdev)) || |
---|
| 162 | + WARN_ON_ONCE(!disk->fops->report_zones)) |
---|
131 | 163 | return -EOPNOTSUPP; |
---|
132 | 164 | |
---|
133 | | - if (!nrz) |
---|
| 165 | + if (!nr_zones || sector >= capacity) |
---|
134 | 166 | return 0; |
---|
135 | 167 | |
---|
136 | | - if (sector > bdev->bd_part->nr_sects) { |
---|
137 | | - *nr_zones = 0; |
---|
138 | | - return 0; |
---|
139 | | - } |
---|
140 | | - |
---|
141 | | - /* |
---|
142 | | - * The zone report has a header. So make room for it in the |
---|
143 | | - * payload. Also make sure that the report fits in a single BIO |
---|
144 | | - * that will not be split down the stack. |
---|
145 | | - */ |
---|
146 | | - rep_bytes = sizeof(struct blk_zone_report_hdr) + |
---|
147 | | - sizeof(struct blk_zone) * nrz; |
---|
148 | | - rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK; |
---|
149 | | - if (rep_bytes > (queue_max_sectors(q) << 9)) |
---|
150 | | - rep_bytes = queue_max_sectors(q) << 9; |
---|
151 | | - |
---|
152 | | - nr_pages = min_t(unsigned int, BIO_MAX_PAGES, |
---|
153 | | - rep_bytes >> PAGE_SHIFT); |
---|
154 | | - nr_pages = min_t(unsigned int, nr_pages, |
---|
155 | | - queue_max_segments(q)); |
---|
156 | | - |
---|
157 | | - bio = bio_alloc(gfp_mask, nr_pages); |
---|
158 | | - if (!bio) |
---|
159 | | - return -ENOMEM; |
---|
160 | | - |
---|
161 | | - bio_set_dev(bio, bdev); |
---|
162 | | - bio->bi_iter.bi_sector = blk_zone_start(q, sector); |
---|
163 | | - bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); |
---|
164 | | - |
---|
165 | | - for (i = 0; i < nr_pages; i++) { |
---|
166 | | - page = alloc_page(gfp_mask); |
---|
167 | | - if (!page) { |
---|
168 | | - ret = -ENOMEM; |
---|
169 | | - goto out; |
---|
170 | | - } |
---|
171 | | - if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { |
---|
172 | | - __free_page(page); |
---|
173 | | - break; |
---|
174 | | - } |
---|
175 | | - } |
---|
176 | | - |
---|
177 | | - if (i == 0) |
---|
178 | | - ret = -ENOMEM; |
---|
179 | | - else |
---|
180 | | - ret = submit_bio_wait(bio); |
---|
181 | | - if (ret) |
---|
182 | | - goto out; |
---|
183 | | - |
---|
184 | | - /* |
---|
185 | | - * Process the report result: skip the header and go through the |
---|
186 | | - * reported zones to fixup and fixup the zone information for |
---|
187 | | - * partitions. At the same time, return the zone information into |
---|
188 | | - * the zone array. |
---|
189 | | - */ |
---|
190 | | - n = 0; |
---|
191 | | - nz = 0; |
---|
192 | | - nr_rep = 0; |
---|
193 | | - bio_for_each_segment_all(bv, bio, i) { |
---|
194 | | - |
---|
195 | | - if (!bv->bv_page) |
---|
196 | | - break; |
---|
197 | | - |
---|
198 | | - addr = kmap_atomic(bv->bv_page); |
---|
199 | | - |
---|
200 | | - /* Get header in the first page */ |
---|
201 | | - ofst = 0; |
---|
202 | | - if (!nr_rep) { |
---|
203 | | - hdr = addr; |
---|
204 | | - nr_rep = hdr->nr_zones; |
---|
205 | | - ofst = sizeof(struct blk_zone_report_hdr); |
---|
206 | | - } |
---|
207 | | - |
---|
208 | | - /* Fixup and report zones */ |
---|
209 | | - while (ofst < bv->bv_len && |
---|
210 | | - n < nr_rep && nz < nrz) { |
---|
211 | | - if (blkdev_report_zone(bdev, addr + ofst, &zones[nz])) |
---|
212 | | - nz++; |
---|
213 | | - ofst += sizeof(struct blk_zone); |
---|
214 | | - n++; |
---|
215 | | - } |
---|
216 | | - |
---|
217 | | - kunmap_atomic(addr); |
---|
218 | | - |
---|
219 | | - if (n >= nr_rep || nz >= nrz) |
---|
220 | | - break; |
---|
221 | | - |
---|
222 | | - } |
---|
223 | | - |
---|
224 | | - *nr_zones = nz; |
---|
225 | | -out: |
---|
226 | | - bio_for_each_segment_all(bv, bio, i) |
---|
227 | | - __free_page(bv->bv_page); |
---|
228 | | - bio_put(bio); |
---|
229 | | - |
---|
230 | | - return ret; |
---|
| 168 | + return disk->fops->report_zones(disk, sector, nr_zones, cb, data); |
---|
231 | 169 | } |
---|
232 | 170 | EXPORT_SYMBOL_GPL(blkdev_report_zones); |
---|
233 | 171 | |
---|
| 172 | +static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, |
---|
| 173 | + sector_t sector, |
---|
| 174 | + sector_t nr_sectors) |
---|
| 175 | +{ |
---|
| 176 | + if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) |
---|
| 177 | + return false; |
---|
| 178 | + |
---|
| 179 | + /* |
---|
| 180 | + * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors |
---|
| 181 | + * of the applicable zone range is the entire disk. |
---|
| 182 | + */ |
---|
| 183 | + return !sector && nr_sectors == get_capacity(bdev->bd_disk); |
---|
| 184 | +} |
---|
| 185 | + |
---|
234 | 186 | /** |
---|
235 | | - * blkdev_reset_zones - Reset zones write pointer |
---|
| 187 | + * blkdev_zone_mgmt - Execute a zone management operation on a range of zones |
---|
236 | 188 | * @bdev: Target block device |
---|
237 | | - * @sector: Start sector of the first zone to reset |
---|
238 | | - * @nr_sectors: Number of sectors, at least the length of one zone |
---|
| 189 | + * @op: Operation to be performed on the zones |
---|
| 190 | + * @sector: Start sector of the first zone to operate on |
---|
| 191 | + * @nr_sectors: Number of sectors, should be at least the length of one zone and |
---|
| 192 | + * must be zone size aligned. |
---|
239 | 193 | * @gfp_mask: Memory allocation flags (for bio_alloc) |
---|
240 | 194 | * |
---|
241 | 195 | * Description: |
---|
242 | | - * Reset the write pointer of the zones contained in the range |
---|
| 196 | + * Perform the specified operation on the range of zones specified by |
---|
243 | 197 | * @sector..@sector+@nr_sectors. Specifying the entire disk sector range |
---|
244 | 198 | * is valid, but the specified range should not contain conventional zones. |
---|
| 199 | + * The operation to execute on each zone can be a zone reset, open, close |
---|
| 200 | + * or finish request. |
---|
245 | 201 | */ |
---|
246 | | -int blkdev_reset_zones(struct block_device *bdev, |
---|
247 | | - sector_t sector, sector_t nr_sectors, |
---|
248 | | - gfp_t gfp_mask) |
---|
| 202 | +int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, |
---|
| 203 | + sector_t sector, sector_t nr_sectors, |
---|
| 204 | + gfp_t gfp_mask) |
---|
249 | 205 | { |
---|
250 | 206 | struct request_queue *q = bdev_get_queue(bdev); |
---|
251 | | - sector_t zone_sectors; |
---|
| 207 | + sector_t zone_sectors = blk_queue_zone_sectors(q); |
---|
| 208 | + sector_t capacity = get_capacity(bdev->bd_disk); |
---|
252 | 209 | sector_t end_sector = sector + nr_sectors; |
---|
253 | | - struct bio *bio; |
---|
| 210 | + struct bio *bio = NULL; |
---|
254 | 211 | int ret; |
---|
255 | | - |
---|
256 | | - if (!q) |
---|
257 | | - return -ENXIO; |
---|
258 | 212 | |
---|
259 | 213 | if (!blk_queue_is_zoned(q)) |
---|
260 | 214 | return -EOPNOTSUPP; |
---|
261 | 215 | |
---|
262 | | - if (end_sector > bdev->bd_part->nr_sects) |
---|
| 216 | + if (bdev_read_only(bdev)) |
---|
| 217 | + return -EPERM; |
---|
| 218 | + |
---|
| 219 | + if (!op_is_zone_mgmt(op)) |
---|
| 220 | + return -EOPNOTSUPP; |
---|
| 221 | + |
---|
| 222 | + if (end_sector <= sector || end_sector > capacity) |
---|
263 | 223 | /* Out of range */ |
---|
264 | 224 | return -EINVAL; |
---|
265 | 225 | |
---|
266 | 226 | /* Check alignment (handle eventual smaller last zone) */ |
---|
267 | | - zone_sectors = blk_queue_zone_sectors(q); |
---|
268 | 227 | if (sector & (zone_sectors - 1)) |
---|
269 | 228 | return -EINVAL; |
---|
270 | 229 | |
---|
271 | | - if ((nr_sectors & (zone_sectors - 1)) && |
---|
272 | | - end_sector != bdev->bd_part->nr_sects) |
---|
| 230 | + if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity) |
---|
273 | 231 | return -EINVAL; |
---|
274 | 232 | |
---|
275 | 233 | while (sector < end_sector) { |
---|
276 | | - |
---|
277 | | - bio = bio_alloc(gfp_mask, 0); |
---|
278 | | - bio->bi_iter.bi_sector = sector; |
---|
| 234 | + bio = blk_next_bio(bio, 0, gfp_mask); |
---|
279 | 235 | bio_set_dev(bio, bdev); |
---|
280 | | - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); |
---|
281 | 236 | |
---|
282 | | - ret = submit_bio_wait(bio); |
---|
283 | | - bio_put(bio); |
---|
| 237 | + /* |
---|
| 238 | + * Special case for the zone reset operation that reset all |
---|
| 239 | + * zones, this is useful for applications like mkfs. |
---|
| 240 | + */ |
---|
| 241 | + if (op == REQ_OP_ZONE_RESET && |
---|
| 242 | + blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { |
---|
| 243 | + bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; |
---|
| 244 | + break; |
---|
| 245 | + } |
---|
284 | 246 | |
---|
285 | | - if (ret) |
---|
286 | | - return ret; |
---|
287 | | - |
---|
| 247 | + bio->bi_opf = op | REQ_SYNC; |
---|
| 248 | + bio->bi_iter.bi_sector = sector; |
---|
288 | 249 | sector += zone_sectors; |
---|
289 | 250 | |
---|
290 | 251 | /* This may take a while, so be nice to others */ |
---|
291 | 252 | cond_resched(); |
---|
292 | | - |
---|
293 | 253 | } |
---|
294 | 254 | |
---|
| 255 | + ret = submit_bio_wait(bio); |
---|
| 256 | + bio_put(bio); |
---|
| 257 | + |
---|
| 258 | + return ret; |
---|
| 259 | +} |
---|
| 260 | +EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); |
---|
| 261 | + |
---|
| 262 | +struct zone_report_args { |
---|
| 263 | + struct blk_zone __user *zones; |
---|
| 264 | +}; |
---|
| 265 | + |
---|
| 266 | +static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, |
---|
| 267 | + void *data) |
---|
| 268 | +{ |
---|
| 269 | + struct zone_report_args *args = data; |
---|
| 270 | + |
---|
| 271 | + if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) |
---|
| 272 | + return -EFAULT; |
---|
295 | 273 | return 0; |
---|
296 | 274 | } |
---|
297 | | -EXPORT_SYMBOL_GPL(blkdev_reset_zones); |
---|
298 | 275 | |
---|
299 | 276 | /* |
---|
300 | 277 | * BLKREPORTZONE ioctl processing. |
---|
.. | .. |
---|
304 | 281 | unsigned int cmd, unsigned long arg) |
---|
305 | 282 | { |
---|
306 | 283 | void __user *argp = (void __user *)arg; |
---|
| 284 | + struct zone_report_args args; |
---|
307 | 285 | struct request_queue *q; |
---|
308 | 286 | struct blk_zone_report rep; |
---|
309 | | - struct blk_zone *zones; |
---|
310 | 287 | int ret; |
---|
311 | 288 | |
---|
312 | 289 | if (!argp) |
---|
.. | .. |
---|
325 | 302 | if (!rep.nr_zones) |
---|
326 | 303 | return -EINVAL; |
---|
327 | 304 | |
---|
328 | | - if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone)) |
---|
329 | | - return -ERANGE; |
---|
| 305 | + args.zones = argp + sizeof(struct blk_zone_report); |
---|
| 306 | + ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, |
---|
| 307 | + blkdev_copy_zone_to_user, &args); |
---|
| 308 | + if (ret < 0) |
---|
| 309 | + return ret; |
---|
330 | 310 | |
---|
331 | | - zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), |
---|
332 | | - GFP_KERNEL | __GFP_ZERO); |
---|
333 | | - if (!zones) |
---|
334 | | - return -ENOMEM; |
---|
| 311 | + rep.nr_zones = ret; |
---|
| 312 | + rep.flags = BLK_ZONE_REP_CAPACITY; |
---|
| 313 | + if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) |
---|
| 314 | + return -EFAULT; |
---|
| 315 | + return 0; |
---|
| 316 | +} |
---|
335 | 317 | |
---|
336 | | - ret = blkdev_report_zones(bdev, rep.sector, |
---|
337 | | - zones, &rep.nr_zones, |
---|
338 | | - GFP_KERNEL); |
---|
339 | | - if (ret) |
---|
340 | | - goto out; |
---|
| 318 | +static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode, |
---|
| 319 | + const struct blk_zone_range *zrange) |
---|
| 320 | +{ |
---|
| 321 | + loff_t start, end; |
---|
341 | 322 | |
---|
342 | | - if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { |
---|
343 | | - ret = -EFAULT; |
---|
344 | | - goto out; |
---|
345 | | - } |
---|
| 323 | + if (zrange->sector + zrange->nr_sectors <= zrange->sector || |
---|
| 324 | + zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) |
---|
| 325 | + /* Out of range */ |
---|
| 326 | + return -EINVAL; |
---|
346 | 327 | |
---|
347 | | - if (rep.nr_zones) { |
---|
348 | | - if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, |
---|
349 | | - sizeof(struct blk_zone) * rep.nr_zones)) |
---|
350 | | - ret = -EFAULT; |
---|
351 | | - } |
---|
| 328 | + start = zrange->sector << SECTOR_SHIFT; |
---|
| 329 | + end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; |
---|
352 | 330 | |
---|
353 | | - out: |
---|
354 | | - kvfree(zones); |
---|
355 | | - |
---|
356 | | - return ret; |
---|
| 331 | + return truncate_bdev_range(bdev, mode, start, end); |
---|
357 | 332 | } |
---|
358 | 333 | |
---|
359 | 334 | /* |
---|
360 | | - * BLKRESETZONE ioctl processing. |
---|
| 335 | + * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. |
---|
361 | 336 | * Called from blkdev_ioctl. |
---|
362 | 337 | */ |
---|
363 | | -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, |
---|
364 | | - unsigned int cmd, unsigned long arg) |
---|
| 338 | +int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, |
---|
| 339 | + unsigned int cmd, unsigned long arg) |
---|
365 | 340 | { |
---|
366 | 341 | void __user *argp = (void __user *)arg; |
---|
367 | 342 | struct request_queue *q; |
---|
368 | 343 | struct blk_zone_range zrange; |
---|
| 344 | + enum req_opf op; |
---|
| 345 | + int ret; |
---|
369 | 346 | |
---|
370 | 347 | if (!argp) |
---|
371 | 348 | return -EINVAL; |
---|
.. | .. |
---|
383 | 360 | if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) |
---|
384 | 361 | return -EFAULT; |
---|
385 | 362 | |
---|
386 | | - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, |
---|
387 | | - GFP_KERNEL); |
---|
| 363 | + switch (cmd) { |
---|
| 364 | + case BLKRESETZONE: |
---|
| 365 | + op = REQ_OP_ZONE_RESET; |
---|
| 366 | + |
---|
| 367 | + /* Invalidate the page cache, including dirty pages. */ |
---|
| 368 | + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); |
---|
| 369 | + if (ret) |
---|
| 370 | + return ret; |
---|
| 371 | + break; |
---|
| 372 | + case BLKOPENZONE: |
---|
| 373 | + op = REQ_OP_ZONE_OPEN; |
---|
| 374 | + break; |
---|
| 375 | + case BLKCLOSEZONE: |
---|
| 376 | + op = REQ_OP_ZONE_CLOSE; |
---|
| 377 | + break; |
---|
| 378 | + case BLKFINISHZONE: |
---|
| 379 | + op = REQ_OP_ZONE_FINISH; |
---|
| 380 | + break; |
---|
| 381 | + default: |
---|
| 382 | + return -ENOTTY; |
---|
| 383 | + } |
---|
| 384 | + |
---|
| 385 | + ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, |
---|
| 386 | + GFP_KERNEL); |
---|
| 387 | + |
---|
| 388 | + /* |
---|
| 389 | + * Invalidate the page cache again for zone reset: writes can only be |
---|
| 390 | + * direct for zoned devices so concurrent writes would not add any page |
---|
| 391 | + * to the page cache after/during reset. The page cache may be filled |
---|
| 392 | + * again due to concurrent reads though and dropping the pages for |
---|
| 393 | + * these is fine. |
---|
| 394 | + */ |
---|
| 395 | + if (!ret && cmd == BLKRESETZONE) |
---|
| 396 | + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); |
---|
| 397 | + |
---|
| 398 | + return ret; |
---|
388 | 399 | } |
---|
| 400 | + |
---|
| 401 | +static inline unsigned long *blk_alloc_zone_bitmap(int node, |
---|
| 402 | + unsigned int nr_zones) |
---|
| 403 | +{ |
---|
| 404 | + return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), |
---|
| 405 | + GFP_NOIO, node); |
---|
| 406 | +} |
---|
| 407 | + |
---|
| 408 | +void blk_queue_free_zone_bitmaps(struct request_queue *q) |
---|
| 409 | +{ |
---|
| 410 | + kfree(q->conv_zones_bitmap); |
---|
| 411 | + q->conv_zones_bitmap = NULL; |
---|
| 412 | + kfree(q->seq_zones_wlock); |
---|
| 413 | + q->seq_zones_wlock = NULL; |
---|
| 414 | +} |
---|
| 415 | + |
---|
| 416 | +struct blk_revalidate_zone_args { |
---|
| 417 | + struct gendisk *disk; |
---|
| 418 | + unsigned long *conv_zones_bitmap; |
---|
| 419 | + unsigned long *seq_zones_wlock; |
---|
| 420 | + unsigned int nr_zones; |
---|
| 421 | + sector_t zone_sectors; |
---|
| 422 | + sector_t sector; |
---|
| 423 | +}; |
---|
| 424 | + |
---|
| 425 | +/* |
---|
| 426 | + * Helper function to check the validity of zones of a zoned block device. |
---|
| 427 | + */ |
---|
| 428 | +static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, |
---|
| 429 | + void *data) |
---|
| 430 | +{ |
---|
| 431 | + struct blk_revalidate_zone_args *args = data; |
---|
| 432 | + struct gendisk *disk = args->disk; |
---|
| 433 | + struct request_queue *q = disk->queue; |
---|
| 434 | + sector_t capacity = get_capacity(disk); |
---|
| 435 | + |
---|
| 436 | + /* |
---|
| 437 | + * All zones must have the same size, with the exception on an eventual |
---|
| 438 | + * smaller last zone. |
---|
| 439 | + */ |
---|
| 440 | + if (zone->start == 0) { |
---|
| 441 | + if (zone->len == 0 || !is_power_of_2(zone->len)) { |
---|
| 442 | + pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n", |
---|
| 443 | + disk->disk_name, zone->len); |
---|
| 444 | + return -ENODEV; |
---|
| 445 | + } |
---|
| 446 | + |
---|
| 447 | + args->zone_sectors = zone->len; |
---|
| 448 | + args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len); |
---|
| 449 | + } else if (zone->start + args->zone_sectors < capacity) { |
---|
| 450 | + if (zone->len != args->zone_sectors) { |
---|
| 451 | + pr_warn("%s: Invalid zoned device with non constant zone size\n", |
---|
| 452 | + disk->disk_name); |
---|
| 453 | + return -ENODEV; |
---|
| 454 | + } |
---|
| 455 | + } else { |
---|
| 456 | + if (zone->len > args->zone_sectors) { |
---|
| 457 | + pr_warn("%s: Invalid zoned device with larger last zone size\n", |
---|
| 458 | + disk->disk_name); |
---|
| 459 | + return -ENODEV; |
---|
| 460 | + } |
---|
| 461 | + } |
---|
| 462 | + |
---|
| 463 | + /* Check for holes in the zone report */ |
---|
| 464 | + if (zone->start != args->sector) { |
---|
| 465 | + pr_warn("%s: Zone gap at sectors %llu..%llu\n", |
---|
| 466 | + disk->disk_name, args->sector, zone->start); |
---|
| 467 | + return -ENODEV; |
---|
| 468 | + } |
---|
| 469 | + |
---|
| 470 | + /* Check zone type */ |
---|
| 471 | + switch (zone->type) { |
---|
| 472 | + case BLK_ZONE_TYPE_CONVENTIONAL: |
---|
| 473 | + if (!args->conv_zones_bitmap) { |
---|
| 474 | + args->conv_zones_bitmap = |
---|
| 475 | + blk_alloc_zone_bitmap(q->node, args->nr_zones); |
---|
| 476 | + if (!args->conv_zones_bitmap) |
---|
| 477 | + return -ENOMEM; |
---|
| 478 | + } |
---|
| 479 | + set_bit(idx, args->conv_zones_bitmap); |
---|
| 480 | + break; |
---|
| 481 | + case BLK_ZONE_TYPE_SEQWRITE_REQ: |
---|
| 482 | + case BLK_ZONE_TYPE_SEQWRITE_PREF: |
---|
| 483 | + if (!args->seq_zones_wlock) { |
---|
| 484 | + args->seq_zones_wlock = |
---|
| 485 | + blk_alloc_zone_bitmap(q->node, args->nr_zones); |
---|
| 486 | + if (!args->seq_zones_wlock) |
---|
| 487 | + return -ENOMEM; |
---|
| 488 | + } |
---|
| 489 | + break; |
---|
| 490 | + default: |
---|
| 491 | + pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", |
---|
| 492 | + disk->disk_name, (int)zone->type, zone->start); |
---|
| 493 | + return -ENODEV; |
---|
| 494 | + } |
---|
| 495 | + |
---|
| 496 | + args->sector += zone->len; |
---|
| 497 | + return 0; |
---|
| 498 | +} |
---|
| 499 | + |
---|
| 500 | +/** |
---|
| 501 | + * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps |
---|
| 502 | + * @disk: Target disk |
---|
| 503 | + * @update_driver_data: Callback to update driver data on the frozen disk |
---|
| 504 | + * |
---|
| 505 | + * Helper function for low-level device drivers to (re) allocate and initialize |
---|
| 506 | + * a disk request queue zone bitmaps. This functions should normally be called |
---|
| 507 | + * within the disk ->revalidate method for blk-mq based drivers. For BIO based |
---|
| 508 | + * drivers only q->nr_zones needs to be updated so that the sysfs exposed value |
---|
| 509 | + * is correct. |
---|
| 510 | + * If the @update_driver_data callback function is not NULL, the callback is |
---|
| 511 | + * executed with the device request queue frozen after all zones have been |
---|
| 512 | + * checked. |
---|
| 513 | + */ |
---|
| 514 | +int blk_revalidate_disk_zones(struct gendisk *disk, |
---|
| 515 | + void (*update_driver_data)(struct gendisk *disk)) |
---|
| 516 | +{ |
---|
| 517 | + struct request_queue *q = disk->queue; |
---|
| 518 | + struct blk_revalidate_zone_args args = { |
---|
| 519 | + .disk = disk, |
---|
| 520 | + }; |
---|
| 521 | + unsigned int noio_flag; |
---|
| 522 | + int ret; |
---|
| 523 | + |
---|
| 524 | + if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) |
---|
| 525 | + return -EIO; |
---|
| 526 | + if (WARN_ON_ONCE(!queue_is_mq(q))) |
---|
| 527 | + return -EIO; |
---|
| 528 | + |
---|
| 529 | + if (!get_capacity(disk)) |
---|
| 530 | + return -EIO; |
---|
| 531 | + |
---|
| 532 | + /* |
---|
| 533 | + * Ensure that all memory allocations in this context are done as if |
---|
| 534 | + * GFP_NOIO was specified. |
---|
| 535 | + */ |
---|
| 536 | + noio_flag = memalloc_noio_save(); |
---|
| 537 | + ret = disk->fops->report_zones(disk, 0, UINT_MAX, |
---|
| 538 | + blk_revalidate_zone_cb, &args); |
---|
| 539 | + memalloc_noio_restore(noio_flag); |
---|
| 540 | + |
---|
| 541 | + /* |
---|
| 542 | + * Install the new bitmaps and update nr_zones only once the queue is |
---|
| 543 | + * stopped and all I/Os are completed (i.e. a scheduler is not |
---|
| 544 | + * referencing the bitmaps). |
---|
| 545 | + */ |
---|
| 546 | + blk_mq_freeze_queue(q); |
---|
| 547 | + if (ret >= 0) { |
---|
| 548 | + blk_queue_chunk_sectors(q, args.zone_sectors); |
---|
| 549 | + q->nr_zones = args.nr_zones; |
---|
| 550 | + swap(q->seq_zones_wlock, args.seq_zones_wlock); |
---|
| 551 | + swap(q->conv_zones_bitmap, args.conv_zones_bitmap); |
---|
| 552 | + if (update_driver_data) |
---|
| 553 | + update_driver_data(disk); |
---|
| 554 | + ret = 0; |
---|
| 555 | + } else { |
---|
| 556 | + pr_warn("%s: failed to revalidate zones\n", disk->disk_name); |
---|
| 557 | + blk_queue_free_zone_bitmaps(q); |
---|
| 558 | + } |
---|
| 559 | + blk_mq_unfreeze_queue(q); |
---|
| 560 | + |
---|
| 561 | + kfree(args.seq_zones_wlock); |
---|
| 562 | + kfree(args.conv_zones_bitmap); |
---|
| 563 | + return ret; |
---|
| 564 | +} |
---|
| 565 | +EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); |
---|