hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/scsi/sd_zbc.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * SCSI Zoned Block commands
34 *
....@@ -5,24 +6,12 @@
56 * Written by: Hannes Reinecke <hare@suse.de>
67 * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
78 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com>
8
- *
9
- * This program is free software; you can redistribute it and/or
10
- * modify it under the terms of the GNU General Public License version
11
- * 2 as published by the Free Software Foundation.
12
- *
13
- * This program is distributed in the hope that it will be useful, but
14
- * WITHOUT ANY WARRANTY; without even the implied warranty of
15
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- * General Public License for more details.
17
- *
18
- * You should have received a copy of the GNU General Public License
19
- * along with this program; see the file COPYING. If not, write to
20
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
21
- * USA.
22
- *
239 */
2410
2511 #include <linux/blkdev.h>
12
+#include <linux/vmalloc.h>
13
+#include <linux/sched/mm.h>
14
+#include <linux/mutex.h>
2615
2716 #include <asm/unaligned.h>
2817
....@@ -31,47 +20,79 @@
3120
3221 #include "sd.h"
3322
34
-/**
35
- * sd_zbc_parse_report - Convert a zone descriptor to a struct blk_zone,
36
- * @sdkp: The disk the report originated from
37
- * @buf: Address of the report zone descriptor
38
- * @zone: the destination zone structure
39
- *
40
- * All LBA sized values are converted to 512B sectors unit.
41
- */
42
-static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
43
- struct blk_zone *zone)
23
+static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
24
+{
25
+ if (zone->type == ZBC_ZONE_TYPE_CONV)
26
+ return 0;
27
+
28
+ switch (zone->cond) {
29
+ case BLK_ZONE_COND_IMP_OPEN:
30
+ case BLK_ZONE_COND_EXP_OPEN:
31
+ case BLK_ZONE_COND_CLOSED:
32
+ return zone->wp - zone->start;
33
+ case BLK_ZONE_COND_FULL:
34
+ return zone->len;
35
+ case BLK_ZONE_COND_EMPTY:
36
+ case BLK_ZONE_COND_OFFLINE:
37
+ case BLK_ZONE_COND_READONLY:
38
+ default:
39
+ /*
40
+ * Offline and read-only zones do not have a valid
41
+ * write pointer. Use 0 as for an empty zone.
42
+ */
43
+ return 0;
44
+ }
45
+}
46
+
47
+static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
48
+ unsigned int idx, report_zones_cb cb, void *data)
4449 {
4550 struct scsi_device *sdp = sdkp->device;
51
+ struct blk_zone zone = { 0 };
52
+ int ret;
4653
47
- memset(zone, 0, sizeof(struct blk_zone));
48
-
49
- zone->type = buf[0] & 0x0f;
50
- zone->cond = (buf[1] >> 4) & 0xf;
54
+ zone.type = buf[0] & 0x0f;
55
+ zone.cond = (buf[1] >> 4) & 0xf;
5156 if (buf[1] & 0x01)
52
- zone->reset = 1;
57
+ zone.reset = 1;
5358 if (buf[1] & 0x02)
54
- zone->non_seq = 1;
59
+ zone.non_seq = 1;
5560
56
- zone->len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
57
- zone->start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
58
- zone->wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
59
- if (zone->type != ZBC_ZONE_TYPE_CONV &&
60
- zone->cond == ZBC_ZONE_COND_FULL)
61
- zone->wp = zone->start + zone->len;
61
+ zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
62
+ zone.capacity = zone.len;
63
+ zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
64
+ zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
65
+ if (zone.type != ZBC_ZONE_TYPE_CONV &&
66
+ zone.cond == ZBC_ZONE_COND_FULL)
67
+ zone.wp = zone.start + zone.len;
68
+
69
+ ret = cb(&zone, idx, data);
70
+ if (ret)
71
+ return ret;
72
+
73
+ if (sdkp->rev_wp_offset)
74
+ sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
75
+
76
+ return 0;
6277 }
6378
6479 /**
65
- * sd_zbc_report_zones - Issue a REPORT ZONES scsi command.
80
+ * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
6681 * @sdkp: The target disk
67
- * @buf: Buffer to use for the reply
82
+ * @buf: vmalloc-ed buffer to use for the reply
6883 * @buflen: the buffer size
6984 * @lba: Start LBA of the report
85
+ * @partial: Do partial report
7086 *
7187 * For internal use during device validation.
88
+ * Using partial=true can significantly speed up execution of a report zones
89
+ * command because the disk does not have to count all possible report matching
90
+ * zones and will only report the count of zones fitting in the command reply
91
+ * buffer.
7292 */
73
-static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
74
- unsigned int buflen, sector_t lba)
93
+static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
94
+ unsigned int buflen, sector_t lba,
95
+ bool partial)
7596 {
7697 struct scsi_device *sdp = sdkp->device;
7798 const int timeout = sdp->request_queue->rq_timeout;
....@@ -85,16 +106,19 @@
85106 cmd[1] = ZI_REPORT_ZONES;
86107 put_unaligned_be64(lba, &cmd[2]);
87108 put_unaligned_be32(buflen, &cmd[10]);
88
- memset(buf, 0, buflen);
109
+ if (partial)
110
+ cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
89111
90112 result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
91113 buf, buflen, &sshdr,
92114 timeout, SD_MAX_RETRIES, NULL);
93115 if (result) {
94116 sd_printk(KERN_ERR, sdkp,
95
- "REPORT ZONES lba %llu failed with %d/%d\n",
96
- (unsigned long long)lba,
97
- host_byte(result), driver_byte(result));
117
+ "REPORT ZONES start lba %llu failed\n", lba);
118
+ sd_print_result(sdkp, "REPORT ZONES", result);
119
+ if (driver_byte(result) == DRIVER_SENSE &&
120
+ scsi_sense_valid(&sshdr))
121
+ sd_print_sense_hdr(sdkp, &sshdr);
98122 return -EIO;
99123 }
100124
....@@ -110,108 +134,51 @@
110134 }
111135
112136 /**
113
- * sd_zbc_setup_report_cmnd - Prepare a REPORT ZONES scsi command
114
- * @cmd: The command to setup
137
+ * Allocate a buffer for report zones reply.
138
+ * @sdkp: The target disk
139
+ * @nr_zones: Maximum number of zones to report
140
+ * @buflen: Size of the buffer allocated
115141 *
116
- * Call in sd_init_command() for a REQ_OP_ZONE_REPORT request.
117
- */
118
-int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
119
-{
120
- struct request *rq = cmd->request;
121
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
122
- sector_t lba, sector = blk_rq_pos(rq);
123
- unsigned int nr_bytes = blk_rq_bytes(rq);
124
- int ret;
125
-
126
- WARN_ON(nr_bytes == 0);
127
-
128
- if (!sd_is_zoned(sdkp))
129
- /* Not a zoned device */
130
- return BLKPREP_KILL;
131
-
132
- ret = scsi_init_io(cmd);
133
- if (ret != BLKPREP_OK)
134
- return ret;
135
-
136
- cmd->cmd_len = 16;
137
- memset(cmd->cmnd, 0, cmd->cmd_len);
138
- cmd->cmnd[0] = ZBC_IN;
139
- cmd->cmnd[1] = ZI_REPORT_ZONES;
140
- lba = sectors_to_logical(sdkp->device, sector);
141
- put_unaligned_be64(lba, &cmd->cmnd[2]);
142
- put_unaligned_be32(nr_bytes, &cmd->cmnd[10]);
143
- /* Do partial report for speeding things up */
144
- cmd->cmnd[14] = ZBC_REPORT_ZONE_PARTIAL;
145
-
146
- cmd->sc_data_direction = DMA_FROM_DEVICE;
147
- cmd->sdb.length = nr_bytes;
148
- cmd->transfersize = sdkp->device->sector_size;
149
- cmd->allowed = 0;
150
-
151
- return BLKPREP_OK;
152
-}
153
-
154
-/**
155
- * sd_zbc_report_zones_complete - Process a REPORT ZONES scsi command reply.
156
- * @scmd: The completed report zones command
157
- * @good_bytes: reply size in bytes
142
+ * Try to allocate a reply buffer for the number of requested zones.
143
+ * The size of the buffer allocated may be smaller than requested to
144
+ * satify the device constraint (max_hw_sectors, max_segments, etc).
158145 *
159
- * Convert all reported zone descriptors to struct blk_zone. The conversion
160
- * is done in-place, directly in the request specified sg buffer.
146
+ * Return the address of the allocated buffer and update @buflen with
147
+ * the size of the allocated buffer.
161148 */
162
-static void sd_zbc_report_zones_complete(struct scsi_cmnd *scmd,
163
- unsigned int good_bytes)
149
+static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
150
+ unsigned int nr_zones, size_t *buflen)
164151 {
165
- struct request *rq = scmd->request;
166
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
167
- struct sg_mapping_iter miter;
168
- struct blk_zone_report_hdr hdr;
169
- struct blk_zone zone;
170
- unsigned int offset, bytes = 0;
171
- unsigned long flags;
172
- u8 *buf;
152
+ struct request_queue *q = sdkp->disk->queue;
153
+ size_t bufsize;
154
+ void *buf;
173155
174
- if (good_bytes < 64)
175
- return;
156
+ /*
157
+ * Report zone buffer size should be at most 64B times the number of
158
+ * zones requested plus the 64B reply header, but should be aligned
159
+ * to SECTOR_SIZE for ATA devices.
160
+ * Make sure that this size does not exceed the hardware capabilities.
161
+ * Furthermore, since the report zone command cannot be split, make
162
+ * sure that the allocated buffer can always be mapped by limiting the
163
+ * number of pages allocated to the HBA max segments limit.
164
+ */
165
+ nr_zones = min(nr_zones, sdkp->nr_zones);
166
+ bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
167
+ bufsize = min_t(size_t, bufsize,
168
+ queue_max_hw_sectors(q) << SECTOR_SHIFT);
169
+ bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
176170
177
- memset(&hdr, 0, sizeof(struct blk_zone_report_hdr));
178
-
179
- sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd),
180
- SG_MITER_TO_SG | SG_MITER_ATOMIC);
181
-
182
- local_irq_save(flags);
183
- while (sg_miter_next(&miter) && bytes < good_bytes) {
184
-
185
- buf = miter.addr;
186
- offset = 0;
187
-
188
- if (bytes == 0) {
189
- /* Set the report header */
190
- hdr.nr_zones = min_t(unsigned int,
191
- (good_bytes - 64) / 64,
192
- get_unaligned_be32(&buf[0]) / 64);
193
- memcpy(buf, &hdr, sizeof(struct blk_zone_report_hdr));
194
- offset += 64;
195
- bytes += 64;
171
+ while (bufsize >= SECTOR_SIZE) {
172
+ buf = __vmalloc(bufsize,
173
+ GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
174
+ if (buf) {
175
+ *buflen = bufsize;
176
+ return buf;
196177 }
197
-
198
- /* Parse zone descriptors */
199
- while (offset < miter.length && hdr.nr_zones) {
200
- WARN_ON(offset > miter.length);
201
- buf = miter.addr + offset;
202
- sd_zbc_parse_report(sdkp, buf, &zone);
203
- memcpy(buf, &zone, sizeof(struct blk_zone));
204
- offset += 64;
205
- bytes += 64;
206
- hdr.nr_zones--;
207
- }
208
-
209
- if (!hdr.nr_zones)
210
- break;
211
-
178
+ bufsize = rounddown(bufsize >> 1, SECTOR_SIZE);
212179 }
213
- sg_miter_stop(&miter);
214
- local_irq_restore(flags);
180
+
181
+ return NULL;
215182 }
216183
217184 /**
....@@ -223,42 +190,315 @@
223190 return logical_to_sectors(sdkp->device, sdkp->zone_blocks);
224191 }
225192
226
-/**
227
- * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command.
228
- * @cmd: the command to setup
229
- *
230
- * Called from sd_init_command() for a REQ_OP_ZONE_RESET request.
231
- */
232
-int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
193
+int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
194
+ unsigned int nr_zones, report_zones_cb cb, void *data)
195
+{
196
+ struct scsi_disk *sdkp = scsi_disk(disk);
197
+ sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity);
198
+ unsigned int nr, i;
199
+ unsigned char *buf;
200
+ size_t offset, buflen = 0;
201
+ int zone_idx = 0;
202
+ int ret;
203
+
204
+ if (!sd_is_zoned(sdkp))
205
+ /* Not a zoned device */
206
+ return -EOPNOTSUPP;
207
+
208
+ if (!capacity)
209
+ /* Device gone or invalid */
210
+ return -ENODEV;
211
+
212
+ buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
213
+ if (!buf)
214
+ return -ENOMEM;
215
+
216
+ while (zone_idx < nr_zones && sector < capacity) {
217
+ ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
218
+ sectors_to_logical(sdkp->device, sector), true);
219
+ if (ret)
220
+ goto out;
221
+
222
+ offset = 0;
223
+ nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64);
224
+ if (!nr)
225
+ break;
226
+
227
+ for (i = 0; i < nr && zone_idx < nr_zones; i++) {
228
+ offset += 64;
229
+ ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
230
+ cb, data);
231
+ if (ret)
232
+ goto out;
233
+ zone_idx++;
234
+ }
235
+
236
+ sector += sd_zbc_zone_sectors(sdkp) * i;
237
+ }
238
+
239
+ ret = zone_idx;
240
+out:
241
+ kvfree(buf);
242
+ return ret;
243
+}
244
+
245
+static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
233246 {
234247 struct request *rq = cmd->request;
235248 struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
236249 sector_t sector = blk_rq_pos(rq);
237
- sector_t block = sectors_to_logical(sdkp->device, sector);
238250
239251 if (!sd_is_zoned(sdkp))
240252 /* Not a zoned device */
241
- return BLKPREP_KILL;
253
+ return BLK_STS_IOERR;
242254
243255 if (sdkp->device->changed)
244
- return BLKPREP_KILL;
256
+ return BLK_STS_IOERR;
245257
246258 if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
247259 /* Unaligned request */
248
- return BLKPREP_KILL;
260
+ return BLK_STS_IOERR;
261
+
262
+ return BLK_STS_OK;
263
+}
264
+
265
+#define SD_ZBC_INVALID_WP_OFST (~0u)
266
+#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
267
+
268
+static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
269
+ void *data)
270
+{
271
+ struct scsi_disk *sdkp = data;
272
+
273
+ lockdep_assert_held(&sdkp->zones_wp_offset_lock);
274
+
275
+ sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
276
+
277
+ return 0;
278
+}
279
+
280
+static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
281
+{
282
+ struct scsi_disk *sdkp;
283
+ unsigned int zno;
284
+ int ret;
285
+
286
+ sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
287
+
288
+ spin_lock_bh(&sdkp->zones_wp_offset_lock);
289
+ for (zno = 0; zno < sdkp->nr_zones; zno++) {
290
+ if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
291
+ continue;
292
+
293
+ spin_unlock_bh(&sdkp->zones_wp_offset_lock);
294
+ ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
295
+ SD_BUF_SIZE,
296
+ zno * sdkp->zone_blocks, true);
297
+ spin_lock_bh(&sdkp->zones_wp_offset_lock);
298
+ if (!ret)
299
+ sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
300
+ zno, sd_zbc_update_wp_offset_cb,
301
+ sdkp);
302
+ }
303
+ spin_unlock_bh(&sdkp->zones_wp_offset_lock);
304
+
305
+ scsi_device_put(sdkp->device);
306
+}
307
+
308
+/**
309
+ * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
310
+ * @cmd: the command to setup
311
+ * @lba: the LBA to patch
312
+ * @nr_blocks: the number of LBAs to be written
313
+ *
314
+ * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
315
+ * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
316
+ * patching of the lba for an emulated ZONE_APPEND command.
317
+ *
318
+ * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
319
+ * schedule a REPORT ZONES command and return BLK_STS_IOERR.
320
+ */
321
+blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
322
+ unsigned int nr_blocks)
323
+{
324
+ struct request *rq = cmd->request;
325
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
326
+ unsigned int wp_offset, zno = blk_rq_zone_no(rq);
327
+ blk_status_t ret;
328
+
329
+ ret = sd_zbc_cmnd_checks(cmd);
330
+ if (ret != BLK_STS_OK)
331
+ return ret;
332
+
333
+ if (!blk_rq_zone_is_seq(rq))
334
+ return BLK_STS_IOERR;
335
+
336
+ /* Unlock of the write lock will happen in sd_zbc_complete() */
337
+ if (!blk_req_zone_write_trylock(rq))
338
+ return BLK_STS_ZONE_RESOURCE;
339
+
340
+ spin_lock_bh(&sdkp->zones_wp_offset_lock);
341
+ wp_offset = sdkp->zones_wp_offset[zno];
342
+ switch (wp_offset) {
343
+ case SD_ZBC_INVALID_WP_OFST:
344
+ /*
345
+ * We are about to schedule work to update a zone write pointer
346
+ * offset, which will cause the zone append command to be
347
+ * requeued. So make sure that the scsi device does not go away
348
+ * while the work is being processed.
349
+ */
350
+ if (scsi_device_get(sdkp->device)) {
351
+ ret = BLK_STS_IOERR;
352
+ break;
353
+ }
354
+ sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
355
+ schedule_work(&sdkp->zone_wp_offset_work);
356
+ fallthrough;
357
+ case SD_ZBC_UPDATING_WP_OFST:
358
+ ret = BLK_STS_DEV_RESOURCE;
359
+ break;
360
+ default:
361
+ wp_offset = sectors_to_logical(sdkp->device, wp_offset);
362
+ if (wp_offset + nr_blocks > sdkp->zone_blocks) {
363
+ ret = BLK_STS_IOERR;
364
+ break;
365
+ }
366
+
367
+ *lba += wp_offset;
368
+ }
369
+ spin_unlock_bh(&sdkp->zones_wp_offset_lock);
370
+ if (ret)
371
+ blk_req_zone_write_unlock(rq);
372
+ return ret;
373
+}
374
+
375
+/**
376
+ * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
377
+ * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
378
+ * @cmd: the command to setup
379
+ * @op: Operation to be performed
380
+ * @all: All zones control
381
+ *
382
+ * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL,
383
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
384
+ */
385
+blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
386
+ unsigned char op, bool all)
387
+{
388
+ struct request *rq = cmd->request;
389
+ sector_t sector = blk_rq_pos(rq);
390
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
391
+ sector_t block = sectors_to_logical(sdkp->device, sector);
392
+ blk_status_t ret;
393
+
394
+ ret = sd_zbc_cmnd_checks(cmd);
395
+ if (ret != BLK_STS_OK)
396
+ return ret;
249397
250398 cmd->cmd_len = 16;
251399 memset(cmd->cmnd, 0, cmd->cmd_len);
252400 cmd->cmnd[0] = ZBC_OUT;
253
- cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
254
- put_unaligned_be64(block, &cmd->cmnd[2]);
401
+ cmd->cmnd[1] = op;
402
+ if (all)
403
+ cmd->cmnd[14] = 0x1;
404
+ else
405
+ put_unaligned_be64(block, &cmd->cmnd[2]);
255406
256407 rq->timeout = SD_TIMEOUT;
257408 cmd->sc_data_direction = DMA_NONE;
258409 cmd->transfersize = 0;
259410 cmd->allowed = 0;
260411
261
- return BLKPREP_OK;
412
+ return BLK_STS_OK;
413
+}
414
+
415
+static bool sd_zbc_need_zone_wp_update(struct request *rq)
416
+{
417
+ switch (req_op(rq)) {
418
+ case REQ_OP_ZONE_APPEND:
419
+ case REQ_OP_ZONE_FINISH:
420
+ case REQ_OP_ZONE_RESET:
421
+ case REQ_OP_ZONE_RESET_ALL:
422
+ return true;
423
+ case REQ_OP_WRITE:
424
+ case REQ_OP_WRITE_ZEROES:
425
+ case REQ_OP_WRITE_SAME:
426
+ return blk_rq_zone_is_seq(rq);
427
+ default:
428
+ return false;
429
+ }
430
+}
431
+
432
+/**
433
+ * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
434
+ * @cmd: Completed command
435
+ * @good_bytes: Command reply bytes
436
+ *
437
+ * Called from sd_zbc_complete() to handle the update of the cached zone write
438
+ * pointer value in case an update is needed.
439
+ */
440
+static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
441
+ unsigned int good_bytes)
442
+{
443
+ int result = cmd->result;
444
+ struct request *rq = cmd->request;
445
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
446
+ unsigned int zno = blk_rq_zone_no(rq);
447
+ enum req_opf op = req_op(rq);
448
+
449
+ /*
450
+ * If we got an error for a command that needs updating the write
451
+ * pointer offset cache, we must mark the zone wp offset entry as
452
+ * invalid to force an update from disk the next time a zone append
453
+ * command is issued.
454
+ */
455
+ spin_lock_bh(&sdkp->zones_wp_offset_lock);
456
+
457
+ if (result && op != REQ_OP_ZONE_RESET_ALL) {
458
+ if (op == REQ_OP_ZONE_APPEND) {
459
+ /* Force complete completion (no retry) */
460
+ good_bytes = 0;
461
+ scsi_set_resid(cmd, blk_rq_bytes(rq));
462
+ }
463
+
464
+ /*
465
+ * Force an update of the zone write pointer offset on
466
+ * the next zone append access.
467
+ */
468
+ if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
469
+ sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
470
+ goto unlock_wp_offset;
471
+ }
472
+
473
+ switch (op) {
474
+ case REQ_OP_ZONE_APPEND:
475
+ rq->__sector += sdkp->zones_wp_offset[zno];
476
+ fallthrough;
477
+ case REQ_OP_WRITE_ZEROES:
478
+ case REQ_OP_WRITE_SAME:
479
+ case REQ_OP_WRITE:
480
+ if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
481
+ sdkp->zones_wp_offset[zno] +=
482
+ good_bytes >> SECTOR_SHIFT;
483
+ break;
484
+ case REQ_OP_ZONE_RESET:
485
+ sdkp->zones_wp_offset[zno] = 0;
486
+ break;
487
+ case REQ_OP_ZONE_FINISH:
488
+ sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
489
+ break;
490
+ case REQ_OP_ZONE_RESET_ALL:
491
+ memset(sdkp->zones_wp_offset, 0,
492
+ sdkp->nr_zones * sizeof(unsigned int));
493
+ break;
494
+ default:
495
+ break;
496
+ }
497
+
498
+unlock_wp_offset:
499
+ spin_unlock_bh(&sdkp->zones_wp_offset_lock);
500
+
501
+ return good_bytes;
262502 }
263503
264504 /**
....@@ -267,57 +507,48 @@
267507 * @good_bytes: Command reply bytes
268508 * @sshdr: command sense header
269509 *
270
- * Called from sd_done(). Process report zones reply and handle reset zone
271
- * and write commands errors.
510
+ * Called from sd_done() to handle zone commands errors and updates to the
511
+ * device queue zone write pointer offset cahce.
272512 */
273
-void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
513
+unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
274514 struct scsi_sense_hdr *sshdr)
275515 {
276516 int result = cmd->result;
277517 struct request *rq = cmd->request;
278518
279
- switch (req_op(rq)) {
280
- case REQ_OP_ZONE_RESET:
519
+ if (op_is_zone_mgmt(req_op(rq)) &&
520
+ result &&
521
+ sshdr->sense_key == ILLEGAL_REQUEST &&
522
+ sshdr->asc == 0x24) {
523
+ /*
524
+ * INVALID FIELD IN CDB error: a zone management command was
525
+ * attempted on a conventional zone. Nothing to worry about,
526
+ * so be quiet about the error.
527
+ */
528
+ rq->rq_flags |= RQF_QUIET;
529
+ } else if (sd_zbc_need_zone_wp_update(rq))
530
+ good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
281531
282
- if (result &&
283
- sshdr->sense_key == ILLEGAL_REQUEST &&
284
- sshdr->asc == 0x24)
285
- /*
286
- * INVALID FIELD IN CDB error: reset of a conventional
287
- * zone was attempted. Nothing to worry about, so be
288
- * quiet about the error.
289
- */
290
- rq->rq_flags |= RQF_QUIET;
291
- break;
532
+ if (req_op(rq) == REQ_OP_ZONE_APPEND)
533
+ blk_req_zone_write_unlock(rq);
292534
293
- case REQ_OP_WRITE:
294
- case REQ_OP_WRITE_ZEROES:
295
- case REQ_OP_WRITE_SAME:
296
- break;
297
-
298
- case REQ_OP_ZONE_REPORT:
299
-
300
- if (!result)
301
- sd_zbc_report_zones_complete(cmd, good_bytes);
302
- break;
303
-
304
- }
535
+ return good_bytes;
305536 }
306537
307538 /**
308
- * sd_zbc_read_zoned_characteristics - Read zoned block device characteristics
539
+ * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
309540 * @sdkp: Target disk
310541 * @buf: Buffer where to store the VPD page data
311542 *
312
- * Read VPD page B6.
543
+ * Read VPD page B6, get information and check that reads are unconstrained.
313544 */
314
-static int sd_zbc_read_zoned_characteristics(struct scsi_disk *sdkp,
315
- unsigned char *buf)
545
+static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
546
+ unsigned char *buf)
316547 {
317548
318549 if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
319550 sd_printk(KERN_NOTICE, sdkp,
320
- "Unconstrained-read check failed\n");
551
+ "Read zoned characteristics VPD page failed\n");
321552 return -ENODEV;
322553 }
323554
....@@ -335,351 +566,6 @@
335566 sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
336567 }
337568
338
- return 0;
339
-}
340
-
341
-/**
342
- * sd_zbc_check_capacity - Check reported capacity.
343
- * @sdkp: Target disk
344
- * @buf: Buffer to use for commands
345
- *
346
- * ZBC drive may report only the capacity of the first conventional zones at
347
- * LBA 0. This is indicated by the RC_BASIS field of the read capacity reply.
348
- * Check this here. If the disk reported only its conventional zones capacity,
349
- * get the total capacity by doing a report zones.
350
- */
351
-static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
352
-{
353
- sector_t lba;
354
- int ret;
355
-
356
- if (sdkp->rc_basis != 0)
357
- return 0;
358
-
359
- /* Do a report zone to get the maximum LBA to check capacity */
360
- ret = sd_zbc_report_zones(sdkp, buf, SD_BUF_SIZE, 0);
361
- if (ret)
362
- return ret;
363
-
364
- /* The max_lba field is the capacity of this device */
365
- lba = get_unaligned_be64(&buf[8]);
366
- if (lba + 1 == sdkp->capacity)
367
- return 0;
368
-
369
- if (sdkp->first_scan)
370
- sd_printk(KERN_WARNING, sdkp,
371
- "Changing capacity from %llu to max LBA+1 %llu\n",
372
- (unsigned long long)sdkp->capacity,
373
- (unsigned long long)lba + 1);
374
- sdkp->capacity = lba + 1;
375
-
376
- return 0;
377
-}
378
-
379
-#define SD_ZBC_BUF_SIZE 131072U
380
-
381
-/**
382
- * sd_zbc_check_zone_size - Check the device zone sizes
383
- * @sdkp: Target disk
384
- *
385
- * Check that all zones of the device are equal. The last zone can however
386
- * be smaller. The zone size must also be a power of two number of LBAs.
387
- *
388
- * Returns the zone size in number of blocks upon success or an error code
389
- * upon failure.
390
- */
391
-static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
392
-{
393
- u64 zone_blocks = 0;
394
- sector_t block = 0;
395
- unsigned char *buf;
396
- unsigned char *rec;
397
- unsigned int buf_len;
398
- unsigned int list_length;
399
- s64 ret;
400
- u8 same;
401
-
402
- /* Get a buffer */
403
- buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
404
- if (!buf)
405
- return -ENOMEM;
406
-
407
- /* Do a report zone to get the same field */
408
- ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0);
409
- if (ret)
410
- goto out_free;
411
-
412
- same = buf[4] & 0x0f;
413
- if (same > 0) {
414
- rec = &buf[64];
415
- zone_blocks = get_unaligned_be64(&rec[8]);
416
- goto out;
417
- }
418
-
419
- /*
420
- * Check the size of all zones: all zones must be of
421
- * equal size, except the last zone which can be smaller
422
- * than other zones.
423
- */
424
- do {
425
-
426
- /* Parse REPORT ZONES header */
427
- list_length = get_unaligned_be32(&buf[0]) + 64;
428
- rec = buf + 64;
429
- buf_len = min(list_length, SD_ZBC_BUF_SIZE);
430
-
431
- /* Parse zone descriptors */
432
- while (rec < buf + buf_len) {
433
- u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
434
-
435
- if (zone_blocks == 0) {
436
- zone_blocks = this_zone_blocks;
437
- } else if (this_zone_blocks != zone_blocks &&
438
- (block + this_zone_blocks < sdkp->capacity
439
- || this_zone_blocks > zone_blocks)) {
440
- zone_blocks = 0;
441
- goto out;
442
- }
443
- block += this_zone_blocks;
444
- rec += 64;
445
- }
446
-
447
- if (block < sdkp->capacity) {
448
- ret = sd_zbc_report_zones(sdkp, buf,
449
- SD_ZBC_BUF_SIZE, block);
450
- if (ret)
451
- goto out_free;
452
- }
453
-
454
- } while (block < sdkp->capacity);
455
-
456
-out:
457
- if (!zone_blocks) {
458
- if (sdkp->first_scan)
459
- sd_printk(KERN_NOTICE, sdkp,
460
- "Devices with non constant zone "
461
- "size are not supported\n");
462
- ret = -ENODEV;
463
- } else if (!is_power_of_2(zone_blocks)) {
464
- if (sdkp->first_scan)
465
- sd_printk(KERN_NOTICE, sdkp,
466
- "Devices with non power of 2 zone "
467
- "size are not supported\n");
468
- ret = -ENODEV;
469
- } else if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
470
- if (sdkp->first_scan)
471
- sd_printk(KERN_NOTICE, sdkp,
472
- "Zone size too large\n");
473
- ret = -ENODEV;
474
- } else {
475
- ret = zone_blocks;
476
- }
477
-
478
-out_free:
479
- kfree(buf);
480
-
481
- return ret;
482
-}
483
-
484
-/**
485
- * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
486
- * @nr_zones: Number of zones to allocate space for.
487
- * @numa_node: NUMA node to allocate the memory from.
488
- */
489
-static inline unsigned long *
490
-sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
491
-{
492
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
493
- GFP_KERNEL, numa_node);
494
-}
495
-
496
-/**
497
- * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
498
- * @sdkp: disk used
499
- * @buf: report reply buffer
500
- * @buflen: length of @buf
501
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
502
- * @seq_zones_bitmap: bitmap of sequential zones to set
503
- *
504
- * Parse reported zone descriptors in @buf to identify sequential zones and
505
- * set the reported zone bit in @seq_zones_bitmap accordingly.
506
- * Since read-only and offline zones cannot be written, do not
507
- * mark them as sequential in the bitmap.
508
- * Return the LBA after the last zone reported.
509
- */
510
-static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
511
- unsigned int buflen, u32 zone_shift,
512
- unsigned long *seq_zones_bitmap)
513
-{
514
- sector_t lba, next_lba = sdkp->capacity;
515
- unsigned int buf_len, list_length;
516
- unsigned char *rec;
517
- u8 type, cond;
518
-
519
- list_length = get_unaligned_be32(&buf[0]) + 64;
520
- buf_len = min(list_length, buflen);
521
- rec = buf + 64;
522
-
523
- while (rec < buf + buf_len) {
524
- type = rec[0] & 0x0f;
525
- cond = (rec[1] >> 4) & 0xf;
526
- lba = get_unaligned_be64(&rec[16]);
527
- if (type != ZBC_ZONE_TYPE_CONV &&
528
- cond != ZBC_ZONE_COND_READONLY &&
529
- cond != ZBC_ZONE_COND_OFFLINE)
530
- set_bit(lba >> zone_shift, seq_zones_bitmap);
531
- next_lba = lba + get_unaligned_be64(&rec[8]);
532
- rec += 64;
533
- }
534
-
535
- return next_lba;
536
-}
537
-
538
-/**
539
- * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
540
- * @sdkp: target disk
541
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
542
- * @nr_zones: number of zones to set up a seq zone bitmap for
543
- *
544
- * Allocate a zone bitmap and initialize it by identifying sequential zones.
545
- */
546
-static unsigned long *
547
-sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
548
- u32 nr_zones)
549
-{
550
- struct request_queue *q = sdkp->disk->queue;
551
- unsigned long *seq_zones_bitmap;
552
- sector_t lba = 0;
553
- unsigned char *buf;
554
- int ret = -ENOMEM;
555
-
556
- seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
557
- if (!seq_zones_bitmap)
558
- return ERR_PTR(-ENOMEM);
559
-
560
- buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
561
- if (!buf)
562
- goto out;
563
-
564
- while (lba < sdkp->capacity) {
565
- ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba);
566
- if (ret)
567
- goto out;
568
- lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
569
- zone_shift, seq_zones_bitmap);
570
- }
571
-
572
- if (lba != sdkp->capacity) {
573
- /* Something went wrong */
574
- ret = -EIO;
575
- }
576
-
577
-out:
578
- kfree(buf);
579
- if (ret) {
580
- kfree(seq_zones_bitmap);
581
- return ERR_PTR(ret);
582
- }
583
- return seq_zones_bitmap;
584
-}
585
-
586
-static void sd_zbc_cleanup(struct scsi_disk *sdkp)
587
-{
588
- struct request_queue *q = sdkp->disk->queue;
589
-
590
- kfree(q->seq_zones_bitmap);
591
- q->seq_zones_bitmap = NULL;
592
-
593
- kfree(q->seq_zones_wlock);
594
- q->seq_zones_wlock = NULL;
595
-
596
- q->nr_zones = 0;
597
-}
598
-
599
-static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
600
-{
601
- struct request_queue *q = sdkp->disk->queue;
602
- u32 zone_shift = ilog2(zone_blocks);
603
- u32 nr_zones;
604
- int ret;
605
-
606
- /* chunk_sectors indicates the zone size */
607
- blk_queue_chunk_sectors(q,
608
- logical_to_sectors(sdkp->device, zone_blocks));
609
- nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
610
-
611
- /*
612
- * Initialize the device request queue information if the number
613
- * of zones changed.
614
- */
615
- if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
616
- unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
617
- size_t zone_bitmap_size;
618
-
619
- if (nr_zones) {
620
- seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
621
- q->node);
622
- if (!seq_zones_wlock) {
623
- ret = -ENOMEM;
624
- goto err;
625
- }
626
-
627
- seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
628
- zone_shift, nr_zones);
629
- if (IS_ERR(seq_zones_bitmap)) {
630
- ret = PTR_ERR(seq_zones_bitmap);
631
- kfree(seq_zones_wlock);
632
- goto err;
633
- }
634
- }
635
- zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
636
- sizeof(unsigned long);
637
- blk_mq_freeze_queue(q);
638
- if (q->nr_zones != nr_zones) {
639
- /* READ16/WRITE16 is mandatory for ZBC disks */
640
- sdkp->device->use_16_for_rw = 1;
641
- sdkp->device->use_10_for_rw = 0;
642
-
643
- sdkp->zone_blocks = zone_blocks;
644
- sdkp->zone_shift = zone_shift;
645
- sdkp->nr_zones = nr_zones;
646
- q->nr_zones = nr_zones;
647
- swap(q->seq_zones_wlock, seq_zones_wlock);
648
- swap(q->seq_zones_bitmap, seq_zones_bitmap);
649
- } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
650
- zone_bitmap_size) != 0) {
651
- memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
652
- zone_bitmap_size);
653
- }
654
- blk_mq_unfreeze_queue(q);
655
- kfree(seq_zones_wlock);
656
- kfree(seq_zones_bitmap);
657
- }
658
-
659
- return 0;
660
-
661
-err:
662
- sd_zbc_cleanup(sdkp);
663
- return ret;
664
-}
665
-
666
-int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
667
-{
668
- int64_t zone_blocks;
669
- int ret;
670
-
671
- if (!sd_is_zoned(sdkp))
672
- /*
673
- * Device managed or normal SCSI disk,
674
- * no special handling required
675
- */
676
- return 0;
677
-
678
- /* Get zoned block device characteristics */
679
- ret = sd_zbc_read_zoned_characteristics(sdkp, buf);
680
- if (ret)
681
- goto err;
682
-
683569 /*
684570 * Check for unconstrained reads: host-managed devices with
685571 * constrained reads (drives failing read after write pointer)
....@@ -689,47 +575,66 @@
689575 if (sdkp->first_scan)
690576 sd_printk(KERN_NOTICE, sdkp,
691577 "constrained reads devices are not supported\n");
692
- ret = -ENODEV;
693
- goto err;
578
+ return -ENODEV;
694579 }
695580
696
- /* Check capacity */
697
- ret = sd_zbc_check_capacity(sdkp, buf);
698
- if (ret)
699
- goto err;
581
+ return 0;
582
+}
700583
701
- /*
702
- * Check zone size: only devices with a constant zone size (except
703
- * an eventual last runt zone) that is a power of 2 are supported.
704
- */
705
- zone_blocks = sd_zbc_check_zone_size(sdkp);
706
- ret = -EFBIG;
707
- if (zone_blocks != (u32)zone_blocks)
708
- goto err;
709
- ret = zone_blocks;
710
- if (ret < 0)
711
- goto err;
584
+/**
585
+ * sd_zbc_check_capacity - Check the device capacity
586
+ * @sdkp: Target disk
587
+ * @buf: command buffer
588
+ * @zblocks: zone size in number of blocks
589
+ *
590
+ * Get the device zone size and check that the device capacity as reported
591
+ * by READ CAPACITY matches the max_lba value (plus one) of the report zones
592
+ * command reply for devices with RC_BASIS == 0.
593
+ *
594
+ * Returns 0 upon success or an error code upon failure.
595
+ */
596
+static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
597
+ u32 *zblocks)
598
+{
599
+ u64 zone_blocks;
600
+ sector_t max_lba;
601
+ unsigned char *rec;
602
+ int ret;
712603
713
- /* The drive satisfies the kernel restrictions: set it up */
714
- ret = sd_zbc_setup(sdkp, zone_blocks);
604
+ /* Do a report zone to get max_lba and the size of the first zone */
605
+ ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false);
715606 if (ret)
716
- goto err;
607
+ return ret;
608
+
609
+ if (sdkp->rc_basis == 0) {
610
+ /* The max_lba field is the capacity of this device */
611
+ max_lba = get_unaligned_be64(&buf[8]);
612
+ if (sdkp->capacity != max_lba + 1) {
613
+ if (sdkp->first_scan)
614
+ sd_printk(KERN_WARNING, sdkp,
615
+ "Changing capacity from %llu to max LBA+1 %llu\n",
616
+ (unsigned long long)sdkp->capacity,
617
+ (unsigned long long)max_lba + 1);
618
+ sdkp->capacity = max_lba + 1;
619
+ }
620
+ }
621
+
622
+ /* Get the size of the first reported zone */
623
+ rec = buf + 64;
624
+ zone_blocks = get_unaligned_be64(&rec[8]);
625
+ if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
626
+ if (sdkp->first_scan)
627
+ sd_printk(KERN_NOTICE, sdkp,
628
+ "Zone size too large\n");
629
+ return -EFBIG;
630
+ }
631
+
632
+ *zblocks = zone_blocks;
717633
718634 return 0;
719
-
720
-err:
721
- sdkp->capacity = 0;
722
- sd_zbc_cleanup(sdkp);
723
-
724
- return ret;
725635 }
726636
727
-void sd_zbc_remove(struct scsi_disk *sdkp)
728
-{
729
- sd_zbc_cleanup(sdkp);
730
-}
731
-
732
-void sd_zbc_print_zones(struct scsi_disk *sdkp)
637
+static void sd_zbc_print_zones(struct scsi_disk *sdkp)
733638 {
734639 if (!sd_is_zoned(sdkp) || !sdkp->capacity)
735640 return;
....@@ -745,3 +650,160 @@
745650 sdkp->nr_zones,
746651 sdkp->zone_blocks);
747652 }
653
+
654
+static int sd_zbc_init_disk(struct scsi_disk *sdkp)
655
+{
656
+ sdkp->zones_wp_offset = NULL;
657
+ spin_lock_init(&sdkp->zones_wp_offset_lock);
658
+ sdkp->rev_wp_offset = NULL;
659
+ mutex_init(&sdkp->rev_mutex);
660
+ INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
661
+ sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
662
+ if (!sdkp->zone_wp_update_buf)
663
+ return -ENOMEM;
664
+
665
+ return 0;
666
+}
667
+
668
+void sd_zbc_release_disk(struct scsi_disk *sdkp)
669
+{
670
+ kvfree(sdkp->zones_wp_offset);
671
+ sdkp->zones_wp_offset = NULL;
672
+ kfree(sdkp->zone_wp_update_buf);
673
+ sdkp->zone_wp_update_buf = NULL;
674
+}
675
+
676
+static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
677
+{
678
+ struct scsi_disk *sdkp = scsi_disk(disk);
679
+
680
+ swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
681
+}
682
+
683
+int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
684
+{
685
+ struct gendisk *disk = sdkp->disk;
686
+ struct request_queue *q = disk->queue;
687
+ u32 zone_blocks = sdkp->rev_zone_blocks;
688
+ unsigned int nr_zones = sdkp->rev_nr_zones;
689
+ u32 max_append;
690
+ int ret = 0;
691
+ unsigned int flags;
692
+
693
+ /*
694
+ * For all zoned disks, initialize zone append emulation data if not
695
+ * already done. This is necessary also for host-aware disks used as
696
+ * regular disks due to the presence of partitions as these partitions
697
+ * may be deleted and the disk zoned model changed back from
698
+ * BLK_ZONED_NONE to BLK_ZONED_HA.
699
+ */
700
+ if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
701
+ ret = sd_zbc_init_disk(sdkp);
702
+ if (ret)
703
+ return ret;
704
+ }
705
+
706
+ /*
707
+ * There is nothing to do for regular disks, including host-aware disks
708
+ * that have partitions.
709
+ */
710
+ if (!blk_queue_is_zoned(q))
711
+ return 0;
712
+
713
+ /*
714
+ * Make sure revalidate zones are serialized to ensure exclusive
715
+ * updates of the scsi disk data.
716
+ */
717
+ mutex_lock(&sdkp->rev_mutex);
718
+
719
+ if (sdkp->zone_blocks == zone_blocks &&
720
+ sdkp->nr_zones == nr_zones &&
721
+ disk->queue->nr_zones == nr_zones)
722
+ goto unlock;
723
+
724
+ flags = memalloc_noio_save();
725
+ sdkp->zone_blocks = zone_blocks;
726
+ sdkp->nr_zones = nr_zones;
727
+ sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
728
+ if (!sdkp->rev_wp_offset) {
729
+ ret = -ENOMEM;
730
+ memalloc_noio_restore(flags);
731
+ goto unlock;
732
+ }
733
+
734
+ ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
735
+
736
+ memalloc_noio_restore(flags);
737
+ kvfree(sdkp->rev_wp_offset);
738
+ sdkp->rev_wp_offset = NULL;
739
+
740
+ if (ret) {
741
+ sdkp->zone_blocks = 0;
742
+ sdkp->nr_zones = 0;
743
+ sdkp->capacity = 0;
744
+ goto unlock;
745
+ }
746
+
747
+ max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
748
+ q->limits.max_segments << (PAGE_SHIFT - 9));
749
+ max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
750
+
751
+ blk_queue_max_zone_append_sectors(q, max_append);
752
+
753
+ sd_zbc_print_zones(sdkp);
754
+
755
+unlock:
756
+ mutex_unlock(&sdkp->rev_mutex);
757
+
758
+ return ret;
759
+}
760
+
761
+int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
762
+{
763
+ struct gendisk *disk = sdkp->disk;
764
+ struct request_queue *q = disk->queue;
765
+ unsigned int nr_zones;
766
+ u32 zone_blocks = 0;
767
+ int ret;
768
+
769
+ if (!sd_is_zoned(sdkp))
770
+ /*
771
+ * Device managed or normal SCSI disk,
772
+ * no special handling required
773
+ */
774
+ return 0;
775
+
776
+ /* Check zoned block device characteristics (unconstrained reads) */
777
+ ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
778
+ if (ret)
779
+ goto err;
780
+
781
+ /* Check the device capacity reported by report zones */
782
+ ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks);
783
+ if (ret != 0)
784
+ goto err;
785
+
786
+ /* The drive satisfies the kernel restrictions: set it up */
787
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
788
+ blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
789
+ if (sdkp->zones_max_open == U32_MAX)
790
+ blk_queue_max_open_zones(q, 0);
791
+ else
792
+ blk_queue_max_open_zones(q, sdkp->zones_max_open);
793
+ blk_queue_max_active_zones(q, 0);
794
+ nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
795
+
796
+ /* READ16/WRITE16 is mandatory for ZBC disks */
797
+ sdkp->device->use_16_for_rw = 1;
798
+ sdkp->device->use_10_for_rw = 0;
799
+
800
+ sdkp->rev_nr_zones = nr_zones;
801
+ sdkp->rev_zone_blocks = zone_blocks;
802
+
803
+ return 0;
804
+
805
+err:
806
+ sdkp->capacity = 0;
807
+
808
+ return ret;
809
+}