| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
|---|
| 2 | 2 | #include <linux/vmalloc.h> |
|---|
| 3 | | -#include <linux/sizes.h> |
|---|
| 3 | +#include <linux/bitmap.h> |
|---|
| 4 | 4 | #include "null_blk.h" |
|---|
| 5 | + |
|---|
| 6 | +#define CREATE_TRACE_POINTS |
|---|
| 7 | +#include "null_blk_trace.h" |
|---|
| 5 | 8 | |
|---|
| 6 | 9 | #define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) |
|---|
| 7 | 10 | |
|---|
| .. | .. |
|---|
| 10 | 13 | return sect >> ilog2(dev->zone_size_sects); |
|---|
| 11 | 14 | } |
|---|
| 12 | 15 | |
|---|
| 13 | | -int null_zone_init(struct nullb_device *dev) |
|---|
| 16 | +int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) |
|---|
| 14 | 17 | { |
|---|
| 15 | | - sector_t dev_capacity_sects; |
|---|
| 18 | + sector_t dev_capacity_sects, zone_capacity_sects; |
|---|
| 16 | 19 | sector_t sector = 0; |
|---|
| 17 | 20 | unsigned int i; |
|---|
| 18 | 21 | |
|---|
| 19 | 22 | if (!is_power_of_2(dev->zone_size)) { |
|---|
| 20 | | - pr_err("null_blk: zone_size must be power-of-two\n"); |
|---|
| 23 | + pr_err("zone_size must be power-of-two\n"); |
|---|
| 21 | 24 | return -EINVAL; |
|---|
| 22 | 25 | } |
|---|
| 23 | 26 | if (dev->zone_size > dev->size) { |
|---|
| .. | .. |
|---|
| 25 | 28 | return -EINVAL; |
|---|
| 26 | 29 | } |
|---|
| 27 | 30 | |
|---|
| 31 | + if (!dev->zone_capacity) |
|---|
| 32 | + dev->zone_capacity = dev->zone_size; |
|---|
| 33 | + |
|---|
| 34 | + if (dev->zone_capacity > dev->zone_size) { |
|---|
| 35 | + pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n", |
|---|
| 36 | + dev->zone_capacity, dev->zone_size); |
|---|
| 37 | + return -EINVAL; |
|---|
| 38 | + } |
|---|
| 39 | + |
|---|
| 40 | + zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); |
|---|
| 28 | 41 | dev_capacity_sects = MB_TO_SECTS(dev->size); |
|---|
| 29 | 42 | dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); |
|---|
| 30 | 43 | dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); |
|---|
| .. | .. |
|---|
| 36 | 49 | if (!dev->zones) |
|---|
| 37 | 50 | return -ENOMEM; |
|---|
| 38 | 51 | |
|---|
| 39 | | - for (i = 0; i < dev->nr_zones; i++) { |
|---|
| 52 | + /* |
|---|
| 53 | + * With memory backing, the zone_lock spinlock needs to be temporarily |
|---|
| 54 | + * released to avoid scheduling in atomic context. To guarantee zone |
|---|
| 55 | + * information protection, use a bitmap to lock zones with |
|---|
| 56 | + * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing |
|---|
| 57 | + * implies that the queue is marked with BLK_MQ_F_BLOCKING. |
|---|
| 58 | + */ |
|---|
| 59 | + spin_lock_init(&dev->zone_lock); |
|---|
| 60 | + if (dev->memory_backed) { |
|---|
| 61 | + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); |
|---|
| 62 | + if (!dev->zone_locks) { |
|---|
| 63 | + kvfree(dev->zones); |
|---|
| 64 | + return -ENOMEM; |
|---|
| 65 | + } |
|---|
| 66 | + } |
|---|
| 67 | + |
|---|
| 68 | + if (dev->zone_nr_conv >= dev->nr_zones) { |
|---|
| 69 | + dev->zone_nr_conv = dev->nr_zones - 1; |
|---|
| 70 | + pr_info("changed the number of conventional zones to %u", |
|---|
| 71 | + dev->zone_nr_conv); |
|---|
| 72 | + } |
|---|
| 73 | + |
|---|
| 74 | + /* Max active zones has to be < nbr of seq zones in order to be enforceable */ |
|---|
| 75 | + if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) { |
|---|
| 76 | + dev->zone_max_active = 0; |
|---|
| 77 | + pr_info("zone_max_active limit disabled, limit >= zone count\n"); |
|---|
| 78 | + } |
|---|
| 79 | + |
|---|
| 80 | + /* Max open zones has to be <= max active zones */ |
|---|
| 81 | + if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) { |
|---|
| 82 | + dev->zone_max_open = dev->zone_max_active; |
|---|
| 83 | + pr_info("changed the maximum number of open zones to %u\n", |
|---|
| 84 | + dev->nr_zones); |
|---|
| 85 | + } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) { |
|---|
| 86 | + dev->zone_max_open = 0; |
|---|
| 87 | + pr_info("zone_max_open limit disabled, limit >= zone count\n"); |
|---|
| 88 | + } |
|---|
| 89 | + |
|---|
| 90 | + for (i = 0; i < dev->zone_nr_conv; i++) { |
|---|
| 91 | + struct blk_zone *zone = &dev->zones[i]; |
|---|
| 92 | + |
|---|
| 93 | + zone->start = sector; |
|---|
| 94 | + zone->len = dev->zone_size_sects; |
|---|
| 95 | + zone->capacity = zone->len; |
|---|
| 96 | + zone->wp = zone->start + zone->len; |
|---|
| 97 | + zone->type = BLK_ZONE_TYPE_CONVENTIONAL; |
|---|
| 98 | + zone->cond = BLK_ZONE_COND_NOT_WP; |
|---|
| 99 | + |
|---|
| 100 | + sector += dev->zone_size_sects; |
|---|
| 101 | + } |
|---|
| 102 | + |
|---|
| 103 | + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { |
|---|
| 40 | 104 | struct blk_zone *zone = &dev->zones[i]; |
|---|
| 41 | 105 | |
|---|
| 42 | 106 | zone->start = zone->wp = sector; |
|---|
| .. | .. |
|---|
| 44 | 108 | zone->len = dev_capacity_sects - zone->start; |
|---|
| 45 | 109 | else |
|---|
| 46 | 110 | zone->len = dev->zone_size_sects; |
|---|
| 111 | + zone->capacity = |
|---|
| 112 | + min_t(sector_t, zone->len, zone_capacity_sects); |
|---|
| 47 | 113 | zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; |
|---|
| 48 | 114 | zone->cond = BLK_ZONE_COND_EMPTY; |
|---|
| 49 | 115 | |
|---|
| 50 | 116 | sector += dev->zone_size_sects; |
|---|
| 51 | 117 | } |
|---|
| 52 | 118 | |
|---|
| 119 | + q->limits.zoned = BLK_ZONED_HM; |
|---|
| 120 | + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); |
|---|
| 121 | + blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); |
|---|
| 122 | + |
|---|
| 53 | 123 | return 0; |
|---|
| 54 | 124 | } |
|---|
| 55 | 125 | |
|---|
| 56 | | -void null_zone_exit(struct nullb_device *dev) |
|---|
| 126 | +int null_register_zoned_dev(struct nullb *nullb) |
|---|
| 57 | 127 | { |
|---|
| 128 | + struct nullb_device *dev = nullb->dev; |
|---|
| 129 | + struct request_queue *q = nullb->q; |
|---|
| 130 | + |
|---|
| 131 | + if (queue_is_mq(q)) { |
|---|
| 132 | + int ret = blk_revalidate_disk_zones(nullb->disk, NULL); |
|---|
| 133 | + |
|---|
| 134 | + if (ret) |
|---|
| 135 | + return ret; |
|---|
| 136 | + } else { |
|---|
| 137 | + blk_queue_chunk_sectors(q, dev->zone_size_sects); |
|---|
| 138 | + q->nr_zones = blkdev_nr_zones(nullb->disk); |
|---|
| 139 | + } |
|---|
| 140 | + |
|---|
| 141 | + blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); |
|---|
| 142 | + blk_queue_max_open_zones(q, dev->zone_max_open); |
|---|
| 143 | + blk_queue_max_active_zones(q, dev->zone_max_active); |
|---|
| 144 | + |
|---|
| 145 | + return 0; |
|---|
| 146 | +} |
|---|
| 147 | + |
|---|
| 148 | +void null_free_zoned_dev(struct nullb_device *dev) |
|---|
| 149 | +{ |
|---|
| 150 | + bitmap_free(dev->zone_locks); |
|---|
| 58 | 151 | kvfree(dev->zones); |
|---|
| 59 | 152 | dev->zones = NULL; |
|---|
| 60 | 153 | } |
|---|
| 61 | 154 | |
|---|
| 62 | | -static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio, |
|---|
| 63 | | - unsigned int zno, unsigned int nr_zones) |
|---|
| 155 | +static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) |
|---|
| 64 | 156 | { |
|---|
| 65 | | - struct blk_zone_report_hdr *hdr = NULL; |
|---|
| 66 | | - struct bio_vec bvec; |
|---|
| 67 | | - struct bvec_iter iter; |
|---|
| 68 | | - void *addr; |
|---|
| 69 | | - unsigned int zones_to_cpy; |
|---|
| 70 | | - |
|---|
| 71 | | - bio_for_each_segment(bvec, bio, iter) { |
|---|
| 72 | | - addr = kmap_atomic(bvec.bv_page); |
|---|
| 73 | | - |
|---|
| 74 | | - zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone); |
|---|
| 75 | | - |
|---|
| 76 | | - if (!hdr) { |
|---|
| 77 | | - hdr = (struct blk_zone_report_hdr *)addr; |
|---|
| 78 | | - hdr->nr_zones = nr_zones; |
|---|
| 79 | | - zones_to_cpy--; |
|---|
| 80 | | - addr += sizeof(struct blk_zone_report_hdr); |
|---|
| 81 | | - } |
|---|
| 82 | | - |
|---|
| 83 | | - zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones); |
|---|
| 84 | | - |
|---|
| 85 | | - memcpy(addr, &dev->zones[zno], |
|---|
| 86 | | - zones_to_cpy * sizeof(struct blk_zone)); |
|---|
| 87 | | - |
|---|
| 88 | | - kunmap_atomic(addr); |
|---|
| 89 | | - |
|---|
| 90 | | - nr_zones -= zones_to_cpy; |
|---|
| 91 | | - zno += zones_to_cpy; |
|---|
| 92 | | - |
|---|
| 93 | | - if (!nr_zones) |
|---|
| 94 | | - break; |
|---|
| 95 | | - } |
|---|
| 157 | + if (dev->memory_backed) |
|---|
| 158 | + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); |
|---|
| 159 | + spin_lock_irq(&dev->zone_lock); |
|---|
| 96 | 160 | } |
|---|
| 97 | 161 | |
|---|
| 98 | | -blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio) |
|---|
| 162 | +static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) |
|---|
| 163 | +{ |
|---|
| 164 | + spin_unlock_irq(&dev->zone_lock); |
|---|
| 165 | + |
|---|
| 166 | + if (dev->memory_backed) |
|---|
| 167 | + clear_and_wake_up_bit(zno, dev->zone_locks); |
|---|
| 168 | +} |
|---|
| 169 | + |
|---|
| 170 | +int null_report_zones(struct gendisk *disk, sector_t sector, |
|---|
| 171 | + unsigned int nr_zones, report_zones_cb cb, void *data) |
|---|
| 172 | +{ |
|---|
| 173 | + struct nullb *nullb = disk->private_data; |
|---|
| 174 | + struct nullb_device *dev = nullb->dev; |
|---|
| 175 | + unsigned int first_zone, i, zno; |
|---|
| 176 | + struct blk_zone zone; |
|---|
| 177 | + int error; |
|---|
| 178 | + |
|---|
| 179 | + first_zone = null_zone_no(dev, sector); |
|---|
| 180 | + if (first_zone >= dev->nr_zones) |
|---|
| 181 | + return 0; |
|---|
| 182 | + |
|---|
| 183 | + nr_zones = min(nr_zones, dev->nr_zones - first_zone); |
|---|
| 184 | + trace_nullb_report_zones(nullb, nr_zones); |
|---|
| 185 | + |
|---|
| 186 | + zno = first_zone; |
|---|
| 187 | + for (i = 0; i < nr_zones; i++, zno++) { |
|---|
| 188 | + /* |
|---|
| 189 | + * Stacked DM target drivers will remap the zone information by |
|---|
| 190 | + * modifying the zone information passed to the report callback. |
|---|
| 191 | + * So use a local copy to avoid corruption of the device zone |
|---|
| 192 | + * array. |
|---|
| 193 | + */ |
|---|
| 194 | + null_lock_zone(dev, zno); |
|---|
| 195 | + memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone)); |
|---|
| 196 | + null_unlock_zone(dev, zno); |
|---|
| 197 | + |
|---|
| 198 | + error = cb(&zone, i, data); |
|---|
| 199 | + if (error) |
|---|
| 200 | + return error; |
|---|
| 201 | + } |
|---|
| 202 | + |
|---|
| 203 | + return nr_zones; |
|---|
| 204 | +} |
|---|
| 205 | + |
|---|
| 206 | +/* |
|---|
| 207 | + * This is called in the case of memory backing from null_process_cmd() |
|---|
| 208 | + * with the target zone already locked. |
|---|
| 209 | + */ |
|---|
| 210 | +size_t null_zone_valid_read_len(struct nullb *nullb, |
|---|
| 211 | + sector_t sector, unsigned int len) |
|---|
| 99 | 212 | { |
|---|
| 100 | 213 | struct nullb_device *dev = nullb->dev; |
|---|
| 101 | | - unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector); |
|---|
| 102 | | - unsigned int nr_zones = dev->nr_zones - zno; |
|---|
| 103 | | - unsigned int max_zones; |
|---|
| 214 | + struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; |
|---|
| 215 | + unsigned int nr_sectors = len >> SECTOR_SHIFT; |
|---|
| 104 | 216 | |
|---|
| 105 | | - max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1; |
|---|
| 106 | | - nr_zones = min_t(unsigned int, nr_zones, max_zones); |
|---|
| 107 | | - null_zone_fill_bio(nullb->dev, bio, zno, nr_zones); |
|---|
| 217 | + /* Read must be below the write pointer position */ |
|---|
| 218 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL || |
|---|
| 219 | + sector + nr_sectors <= zone->wp) |
|---|
| 220 | + return len; |
|---|
| 221 | + |
|---|
| 222 | + if (sector > zone->wp) |
|---|
| 223 | + return 0; |
|---|
| 224 | + |
|---|
| 225 | + return (zone->wp - sector) << SECTOR_SHIFT; |
|---|
| 226 | +} |
|---|
| 227 | + |
|---|
| 228 | +static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone) |
|---|
| 229 | +{ |
|---|
| 230 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) |
|---|
| 231 | + return BLK_STS_IOERR; |
|---|
| 232 | + |
|---|
| 233 | + switch (zone->cond) { |
|---|
| 234 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 235 | + /* close operation on closed is not an error */ |
|---|
| 236 | + return BLK_STS_OK; |
|---|
| 237 | + case BLK_ZONE_COND_IMP_OPEN: |
|---|
| 238 | + dev->nr_zones_imp_open--; |
|---|
| 239 | + break; |
|---|
| 240 | + case BLK_ZONE_COND_EXP_OPEN: |
|---|
| 241 | + dev->nr_zones_exp_open--; |
|---|
| 242 | + break; |
|---|
| 243 | + case BLK_ZONE_COND_EMPTY: |
|---|
| 244 | + case BLK_ZONE_COND_FULL: |
|---|
| 245 | + default: |
|---|
| 246 | + return BLK_STS_IOERR; |
|---|
| 247 | + } |
|---|
| 248 | + |
|---|
| 249 | + if (zone->wp == zone->start) { |
|---|
| 250 | + zone->cond = BLK_ZONE_COND_EMPTY; |
|---|
| 251 | + } else { |
|---|
| 252 | + zone->cond = BLK_ZONE_COND_CLOSED; |
|---|
| 253 | + dev->nr_zones_closed++; |
|---|
| 254 | + } |
|---|
| 108 | 255 | |
|---|
| 109 | 256 | return BLK_STS_OK; |
|---|
| 110 | 257 | } |
|---|
| 111 | 258 | |
|---|
| 112 | | -void null_zone_write(struct nullb_cmd *cmd, sector_t sector, |
|---|
| 113 | | - unsigned int nr_sectors) |
|---|
| 259 | +static void null_close_first_imp_zone(struct nullb_device *dev) |
|---|
| 260 | +{ |
|---|
| 261 | + unsigned int i; |
|---|
| 262 | + |
|---|
| 263 | + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { |
|---|
| 264 | + if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) { |
|---|
| 265 | + null_close_zone(dev, &dev->zones[i]); |
|---|
| 266 | + return; |
|---|
| 267 | + } |
|---|
| 268 | + } |
|---|
| 269 | +} |
|---|
| 270 | + |
|---|
| 271 | +static blk_status_t null_check_active(struct nullb_device *dev) |
|---|
| 272 | +{ |
|---|
| 273 | + if (!dev->zone_max_active) |
|---|
| 274 | + return BLK_STS_OK; |
|---|
| 275 | + |
|---|
| 276 | + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open + |
|---|
| 277 | + dev->nr_zones_closed < dev->zone_max_active) |
|---|
| 278 | + return BLK_STS_OK; |
|---|
| 279 | + |
|---|
| 280 | + return BLK_STS_ZONE_ACTIVE_RESOURCE; |
|---|
| 281 | +} |
|---|
| 282 | + |
|---|
| 283 | +static blk_status_t null_check_open(struct nullb_device *dev) |
|---|
| 284 | +{ |
|---|
| 285 | + if (!dev->zone_max_open) |
|---|
| 286 | + return BLK_STS_OK; |
|---|
| 287 | + |
|---|
| 288 | + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open) |
|---|
| 289 | + return BLK_STS_OK; |
|---|
| 290 | + |
|---|
| 291 | + if (dev->nr_zones_imp_open) { |
|---|
| 292 | + if (null_check_active(dev) == BLK_STS_OK) { |
|---|
| 293 | + null_close_first_imp_zone(dev); |
|---|
| 294 | + return BLK_STS_OK; |
|---|
| 295 | + } |
|---|
| 296 | + } |
|---|
| 297 | + |
|---|
| 298 | + return BLK_STS_ZONE_OPEN_RESOURCE; |
|---|
| 299 | +} |
|---|
| 300 | + |
|---|
| 301 | +/* |
|---|
| 302 | + * This function matches the manage open zone resources function in the ZBC standard, |
|---|
| 303 | + * with the addition of max active zones support (added in the ZNS standard). |
|---|
| 304 | + * |
|---|
| 305 | + * The function determines if a zone can transition to implicit open or explicit open, |
|---|
| 306 | + * while maintaining the max open zone (and max active zone) limit(s). It may close an |
|---|
| 307 | + * implicit open zone in order to make additional zone resources available. |
|---|
| 308 | + * |
|---|
| 309 | + * ZBC states that an implicit open zone shall be closed only if there is not |
|---|
| 310 | + * room within the open limit. However, with the addition of an active limit, |
|---|
| 311 | + * it is not certain that closing an implicit open zone will allow a new zone |
|---|
| 312 | + * to be opened, since we might already be at the active limit capacity. |
|---|
| 313 | + */ |
|---|
| 314 | +static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone) |
|---|
| 315 | +{ |
|---|
| 316 | + blk_status_t ret; |
|---|
| 317 | + |
|---|
| 318 | + switch (zone->cond) { |
|---|
| 319 | + case BLK_ZONE_COND_EMPTY: |
|---|
| 320 | + ret = null_check_active(dev); |
|---|
| 321 | + if (ret != BLK_STS_OK) |
|---|
| 322 | + return ret; |
|---|
| 323 | + fallthrough; |
|---|
| 324 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 325 | + return null_check_open(dev); |
|---|
| 326 | + default: |
|---|
| 327 | + /* Should never be called for other states */ |
|---|
| 328 | + WARN_ON(1); |
|---|
| 329 | + return BLK_STS_IOERR; |
|---|
| 330 | + } |
|---|
| 331 | +} |
|---|
| 332 | + |
|---|
| 333 | +static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, |
|---|
| 334 | + unsigned int nr_sectors, bool append) |
|---|
| 114 | 335 | { |
|---|
| 115 | 336 | struct nullb_device *dev = cmd->nq->dev; |
|---|
| 116 | 337 | unsigned int zno = null_zone_no(dev, sector); |
|---|
| 117 | 338 | struct blk_zone *zone = &dev->zones[zno]; |
|---|
| 339 | + blk_status_t ret; |
|---|
| 340 | + |
|---|
| 341 | + trace_nullb_zone_op(cmd, zno, zone->cond); |
|---|
| 342 | + |
|---|
| 343 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { |
|---|
| 344 | + if (append) |
|---|
| 345 | + return BLK_STS_IOERR; |
|---|
| 346 | + return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); |
|---|
| 347 | + } |
|---|
| 348 | + |
|---|
| 349 | + null_lock_zone(dev, zno); |
|---|
| 118 | 350 | |
|---|
| 119 | 351 | switch (zone->cond) { |
|---|
| 120 | 352 | case BLK_ZONE_COND_FULL: |
|---|
| 121 | 353 | /* Cannot write to a full zone */ |
|---|
| 122 | | - cmd->error = BLK_STS_IOERR; |
|---|
| 123 | | - break; |
|---|
| 354 | + ret = BLK_STS_IOERR; |
|---|
| 355 | + goto unlock; |
|---|
| 124 | 356 | case BLK_ZONE_COND_EMPTY: |
|---|
| 357 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 358 | + ret = null_check_zone_resources(dev, zone); |
|---|
| 359 | + if (ret != BLK_STS_OK) |
|---|
| 360 | + goto unlock; |
|---|
| 361 | + break; |
|---|
| 125 | 362 | case BLK_ZONE_COND_IMP_OPEN: |
|---|
| 126 | | - /* Writes must be at the write pointer position */ |
|---|
| 127 | | - if (sector != zone->wp) { |
|---|
| 128 | | - cmd->error = BLK_STS_IOERR; |
|---|
| 129 | | - break; |
|---|
| 130 | | - } |
|---|
| 131 | | - |
|---|
| 132 | | - if (zone->cond == BLK_ZONE_COND_EMPTY) |
|---|
| 133 | | - zone->cond = BLK_ZONE_COND_IMP_OPEN; |
|---|
| 134 | | - |
|---|
| 135 | | - zone->wp += nr_sectors; |
|---|
| 136 | | - if (zone->wp == zone->start + zone->len) |
|---|
| 137 | | - zone->cond = BLK_ZONE_COND_FULL; |
|---|
| 363 | + case BLK_ZONE_COND_EXP_OPEN: |
|---|
| 138 | 364 | break; |
|---|
| 139 | 365 | default: |
|---|
| 140 | 366 | /* Invalid zone condition */ |
|---|
| 141 | | - cmd->error = BLK_STS_IOERR; |
|---|
| 142 | | - break; |
|---|
| 367 | + ret = BLK_STS_IOERR; |
|---|
| 368 | + goto unlock; |
|---|
| 143 | 369 | } |
|---|
| 370 | + |
|---|
| 371 | + /* |
|---|
| 372 | + * Regular writes must be at the write pointer position. |
|---|
| 373 | + * Zone append writes are automatically issued at the write |
|---|
| 374 | + * pointer and the position returned using the request or BIO |
|---|
| 375 | + * sector. |
|---|
| 376 | + */ |
|---|
| 377 | + if (append) { |
|---|
| 378 | + sector = zone->wp; |
|---|
| 379 | + if (cmd->bio) |
|---|
| 380 | + cmd->bio->bi_iter.bi_sector = sector; |
|---|
| 381 | + else |
|---|
| 382 | + cmd->rq->__sector = sector; |
|---|
| 383 | + } else if (sector != zone->wp) { |
|---|
| 384 | + ret = BLK_STS_IOERR; |
|---|
| 385 | + goto unlock; |
|---|
| 386 | + } |
|---|
| 387 | + |
|---|
| 388 | + if (zone->wp + nr_sectors > zone->start + zone->capacity) { |
|---|
| 389 | + ret = BLK_STS_IOERR; |
|---|
| 390 | + goto unlock; |
|---|
| 391 | + } |
|---|
| 392 | + |
|---|
| 393 | + if (zone->cond == BLK_ZONE_COND_CLOSED) { |
|---|
| 394 | + dev->nr_zones_closed--; |
|---|
| 395 | + dev->nr_zones_imp_open++; |
|---|
| 396 | + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { |
|---|
| 397 | + dev->nr_zones_imp_open++; |
|---|
| 398 | + } |
|---|
| 399 | + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) |
|---|
| 400 | + zone->cond = BLK_ZONE_COND_IMP_OPEN; |
|---|
| 401 | + |
|---|
| 402 | + /* |
|---|
| 403 | + * Memory backing allocation may sleep: release the zone_lock spinlock |
|---|
| 404 | + * to avoid scheduling in atomic context. Zone operation atomicity is |
|---|
| 405 | + * still guaranteed through the zone_locks bitmap. |
|---|
| 406 | + */ |
|---|
| 407 | + if (dev->memory_backed) |
|---|
| 408 | + spin_unlock_irq(&dev->zone_lock); |
|---|
| 409 | + ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); |
|---|
| 410 | + if (dev->memory_backed) |
|---|
| 411 | + spin_lock_irq(&dev->zone_lock); |
|---|
| 412 | + |
|---|
| 413 | + if (ret != BLK_STS_OK) |
|---|
| 414 | + goto unlock; |
|---|
| 415 | + |
|---|
| 416 | + zone->wp += nr_sectors; |
|---|
| 417 | + if (zone->wp == zone->start + zone->capacity) { |
|---|
| 418 | + if (zone->cond == BLK_ZONE_COND_EXP_OPEN) |
|---|
| 419 | + dev->nr_zones_exp_open--; |
|---|
| 420 | + else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) |
|---|
| 421 | + dev->nr_zones_imp_open--; |
|---|
| 422 | + zone->cond = BLK_ZONE_COND_FULL; |
|---|
| 423 | + } |
|---|
| 424 | + ret = BLK_STS_OK; |
|---|
| 425 | + |
|---|
| 426 | +unlock: |
|---|
| 427 | + null_unlock_zone(dev, zno); |
|---|
| 428 | + |
|---|
| 429 | + return ret; |
|---|
| 144 | 430 | } |
|---|
| 145 | 431 | |
|---|
| 146 | | -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) |
|---|
| 432 | +static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) |
|---|
| 147 | 433 | { |
|---|
| 148 | | - struct nullb_device *dev = cmd->nq->dev; |
|---|
| 149 | | - unsigned int zno = null_zone_no(dev, sector); |
|---|
| 150 | | - struct blk_zone *zone = &dev->zones[zno]; |
|---|
| 434 | + blk_status_t ret; |
|---|
| 435 | + |
|---|
| 436 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) |
|---|
| 437 | + return BLK_STS_IOERR; |
|---|
| 438 | + |
|---|
| 439 | + switch (zone->cond) { |
|---|
| 440 | + case BLK_ZONE_COND_EXP_OPEN: |
|---|
| 441 | + /* open operation on exp open is not an error */ |
|---|
| 442 | + return BLK_STS_OK; |
|---|
| 443 | + case BLK_ZONE_COND_EMPTY: |
|---|
| 444 | + ret = null_check_zone_resources(dev, zone); |
|---|
| 445 | + if (ret != BLK_STS_OK) |
|---|
| 446 | + return ret; |
|---|
| 447 | + break; |
|---|
| 448 | + case BLK_ZONE_COND_IMP_OPEN: |
|---|
| 449 | + dev->nr_zones_imp_open--; |
|---|
| 450 | + break; |
|---|
| 451 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 452 | + ret = null_check_zone_resources(dev, zone); |
|---|
| 453 | + if (ret != BLK_STS_OK) |
|---|
| 454 | + return ret; |
|---|
| 455 | + dev->nr_zones_closed--; |
|---|
| 456 | + break; |
|---|
| 457 | + case BLK_ZONE_COND_FULL: |
|---|
| 458 | + default: |
|---|
| 459 | + return BLK_STS_IOERR; |
|---|
| 460 | + } |
|---|
| 461 | + |
|---|
| 462 | + zone->cond = BLK_ZONE_COND_EXP_OPEN; |
|---|
| 463 | + dev->nr_zones_exp_open++; |
|---|
| 464 | + |
|---|
| 465 | + return BLK_STS_OK; |
|---|
| 466 | +} |
|---|
| 467 | + |
|---|
| 468 | +static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) |
|---|
| 469 | +{ |
|---|
| 470 | + blk_status_t ret; |
|---|
| 471 | + |
|---|
| 472 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) |
|---|
| 473 | + return BLK_STS_IOERR; |
|---|
| 474 | + |
|---|
| 475 | + switch (zone->cond) { |
|---|
| 476 | + case BLK_ZONE_COND_FULL: |
|---|
| 477 | + /* finish operation on full is not an error */ |
|---|
| 478 | + return BLK_STS_OK; |
|---|
| 479 | + case BLK_ZONE_COND_EMPTY: |
|---|
| 480 | + ret = null_check_zone_resources(dev, zone); |
|---|
| 481 | + if (ret != BLK_STS_OK) |
|---|
| 482 | + return ret; |
|---|
| 483 | + break; |
|---|
| 484 | + case BLK_ZONE_COND_IMP_OPEN: |
|---|
| 485 | + dev->nr_zones_imp_open--; |
|---|
| 486 | + break; |
|---|
| 487 | + case BLK_ZONE_COND_EXP_OPEN: |
|---|
| 488 | + dev->nr_zones_exp_open--; |
|---|
| 489 | + break; |
|---|
| 490 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 491 | + ret = null_check_zone_resources(dev, zone); |
|---|
| 492 | + if (ret != BLK_STS_OK) |
|---|
| 493 | + return ret; |
|---|
| 494 | + dev->nr_zones_closed--; |
|---|
| 495 | + break; |
|---|
| 496 | + default: |
|---|
| 497 | + return BLK_STS_IOERR; |
|---|
| 498 | + } |
|---|
| 499 | + |
|---|
| 500 | + zone->cond = BLK_ZONE_COND_FULL; |
|---|
| 501 | + zone->wp = zone->start + zone->len; |
|---|
| 502 | + |
|---|
| 503 | + return BLK_STS_OK; |
|---|
| 504 | +} |
|---|
| 505 | + |
|---|
| 506 | +static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone) |
|---|
| 507 | +{ |
|---|
| 508 | + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) |
|---|
| 509 | + return BLK_STS_IOERR; |
|---|
| 510 | + |
|---|
| 511 | + switch (zone->cond) { |
|---|
| 512 | + case BLK_ZONE_COND_EMPTY: |
|---|
| 513 | + /* reset operation on empty is not an error */ |
|---|
| 514 | + return BLK_STS_OK; |
|---|
| 515 | + case BLK_ZONE_COND_IMP_OPEN: |
|---|
| 516 | + dev->nr_zones_imp_open--; |
|---|
| 517 | + break; |
|---|
| 518 | + case BLK_ZONE_COND_EXP_OPEN: |
|---|
| 519 | + dev->nr_zones_exp_open--; |
|---|
| 520 | + break; |
|---|
| 521 | + case BLK_ZONE_COND_CLOSED: |
|---|
| 522 | + dev->nr_zones_closed--; |
|---|
| 523 | + break; |
|---|
| 524 | + case BLK_ZONE_COND_FULL: |
|---|
| 525 | + break; |
|---|
| 526 | + default: |
|---|
| 527 | + return BLK_STS_IOERR; |
|---|
| 528 | + } |
|---|
| 151 | 529 | |
|---|
| 152 | 530 | zone->cond = BLK_ZONE_COND_EMPTY; |
|---|
| 153 | 531 | zone->wp = zone->start; |
|---|
| 532 | + |
|---|
| 533 | + return BLK_STS_OK; |
|---|
| 534 | +} |
|---|
| 535 | + |
|---|
| 536 | +static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, |
|---|
| 537 | + sector_t sector) |
|---|
| 538 | +{ |
|---|
| 539 | + struct nullb_device *dev = cmd->nq->dev; |
|---|
| 540 | + unsigned int zone_no; |
|---|
| 541 | + struct blk_zone *zone; |
|---|
| 542 | + blk_status_t ret; |
|---|
| 543 | + size_t i; |
|---|
| 544 | + |
|---|
| 545 | + if (op == REQ_OP_ZONE_RESET_ALL) { |
|---|
| 546 | + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { |
|---|
| 547 | + null_lock_zone(dev, i); |
|---|
| 548 | + zone = &dev->zones[i]; |
|---|
| 549 | + if (zone->cond != BLK_ZONE_COND_EMPTY) { |
|---|
| 550 | + null_reset_zone(dev, zone); |
|---|
| 551 | + trace_nullb_zone_op(cmd, i, zone->cond); |
|---|
| 552 | + } |
|---|
| 553 | + null_unlock_zone(dev, i); |
|---|
| 554 | + } |
|---|
| 555 | + return BLK_STS_OK; |
|---|
| 556 | + } |
|---|
| 557 | + |
|---|
| 558 | + zone_no = null_zone_no(dev, sector); |
|---|
| 559 | + zone = &dev->zones[zone_no]; |
|---|
| 560 | + |
|---|
| 561 | + null_lock_zone(dev, zone_no); |
|---|
| 562 | + |
|---|
| 563 | + switch (op) { |
|---|
| 564 | + case REQ_OP_ZONE_RESET: |
|---|
| 565 | + ret = null_reset_zone(dev, zone); |
|---|
| 566 | + break; |
|---|
| 567 | + case REQ_OP_ZONE_OPEN: |
|---|
| 568 | + ret = null_open_zone(dev, zone); |
|---|
| 569 | + break; |
|---|
| 570 | + case REQ_OP_ZONE_CLOSE: |
|---|
| 571 | + ret = null_close_zone(dev, zone); |
|---|
| 572 | + break; |
|---|
| 573 | + case REQ_OP_ZONE_FINISH: |
|---|
| 574 | + ret = null_finish_zone(dev, zone); |
|---|
| 575 | + break; |
|---|
| 576 | + default: |
|---|
| 577 | + ret = BLK_STS_NOTSUPP; |
|---|
| 578 | + break; |
|---|
| 579 | + } |
|---|
| 580 | + |
|---|
| 581 | + if (ret == BLK_STS_OK) |
|---|
| 582 | + trace_nullb_zone_op(cmd, zone_no, zone->cond); |
|---|
| 583 | + |
|---|
| 584 | + null_unlock_zone(dev, zone_no); |
|---|
| 585 | + |
|---|
| 586 | + return ret; |
|---|
| 587 | +} |
|---|
| 588 | + |
|---|
| 589 | +blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, |
|---|
| 590 | + sector_t sector, sector_t nr_sectors) |
|---|
| 591 | +{ |
|---|
| 592 | + struct nullb_device *dev = cmd->nq->dev; |
|---|
| 593 | + unsigned int zno = null_zone_no(dev, sector); |
|---|
| 594 | + blk_status_t sts; |
|---|
| 595 | + |
|---|
| 596 | + switch (op) { |
|---|
| 597 | + case REQ_OP_WRITE: |
|---|
| 598 | + sts = null_zone_write(cmd, sector, nr_sectors, false); |
|---|
| 599 | + break; |
|---|
| 600 | + case REQ_OP_ZONE_APPEND: |
|---|
| 601 | + sts = null_zone_write(cmd, sector, nr_sectors, true); |
|---|
| 602 | + break; |
|---|
| 603 | + case REQ_OP_ZONE_RESET: |
|---|
| 604 | + case REQ_OP_ZONE_RESET_ALL: |
|---|
| 605 | + case REQ_OP_ZONE_OPEN: |
|---|
| 606 | + case REQ_OP_ZONE_CLOSE: |
|---|
| 607 | + case REQ_OP_ZONE_FINISH: |
|---|
| 608 | + sts = null_zone_mgmt(cmd, op, sector); |
|---|
| 609 | + break; |
|---|
| 610 | + default: |
|---|
| 611 | + null_lock_zone(dev, zno); |
|---|
| 612 | + sts = null_process_cmd(cmd, op, sector, nr_sectors); |
|---|
| 613 | + null_unlock_zone(dev, zno); |
|---|
| 614 | + } |
|---|
| 615 | + |
|---|
| 616 | + return sts; |
|---|
| 154 | 617 | } |
|---|