.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
| 3 | + * Copyright (C) 2018 Cambridge Greys Ltd |
---|
2 | 4 | * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) |
---|
3 | 5 | * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) |
---|
4 | | - * Licensed under the GPL |
---|
5 | 6 | */ |
---|
6 | 7 | |
---|
7 | 8 | /* 2001-09-28...2002-04-17 |
---|
.. | .. |
---|
23 | 24 | #include <linux/module.h> |
---|
24 | 25 | #include <linux/init.h> |
---|
25 | 26 | #include <linux/blkdev.h> |
---|
| 27 | +#include <linux/blk-mq.h> |
---|
26 | 28 | #include <linux/ata.h> |
---|
27 | 29 | #include <linux/hdreg.h> |
---|
28 | 30 | #include <linux/cdrom.h> |
---|
.. | .. |
---|
42 | 44 | #include <os.h> |
---|
43 | 45 | #include "cow.h" |
---|
44 | 46 | |
---|
45 | | -enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH }; |
---|
| 47 | +/* Max request size is determined by sector mask - 32K */ |
---|
| 48 | +#define UBD_MAX_REQUEST (8 * sizeof(long)) |
---|
46 | 49 | |
---|
47 | | -struct io_thread_req { |
---|
48 | | - struct request *req; |
---|
49 | | - enum ubd_req op; |
---|
50 | | - int fds[2]; |
---|
51 | | - unsigned long offsets[2]; |
---|
52 | | - unsigned long long offset; |
---|
53 | | - unsigned long length; |
---|
| 50 | +struct io_desc { |
---|
54 | 51 | char *buffer; |
---|
55 | | - int sectorsize; |
---|
| 52 | + unsigned long length; |
---|
56 | 53 | unsigned long sector_mask; |
---|
57 | 54 | unsigned long long cow_offset; |
---|
58 | 55 | unsigned long bitmap_words[2]; |
---|
| 56 | +}; |
---|
| 57 | + |
---|
| 58 | +struct io_thread_req { |
---|
| 59 | + struct request *req; |
---|
| 60 | + int fds[2]; |
---|
| 61 | + unsigned long offsets[2]; |
---|
| 62 | + unsigned long long offset; |
---|
| 63 | + int sectorsize; |
---|
59 | 64 | int error; |
---|
| 65 | + |
---|
| 66 | + int desc_cnt; |
---|
| 67 | + /* io_desc has to be the last element of the struct */ |
---|
| 68 | + struct io_desc io_desc[]; |
---|
60 | 69 | }; |
---|
61 | 70 | |
---|
62 | 71 | |
---|
.. | .. |
---|
111 | 120 | .open = ubd_open, |
---|
112 | 121 | .release = ubd_release, |
---|
113 | 122 | .ioctl = ubd_ioctl, |
---|
| 123 | + .compat_ioctl = blkdev_compat_ptr_ioctl, |
---|
114 | 124 | .getgeo = ubd_getgeo, |
---|
115 | 125 | }; |
---|
116 | 126 | |
---|
.. | .. |
---|
142 | 152 | #define MAX_SG 64 |
---|
143 | 153 | |
---|
144 | 154 | struct ubd { |
---|
145 | | - struct list_head restart; |
---|
146 | 155 | /* name (and fd, below) of the file opened for writing, either the |
---|
147 | 156 | * backing or the cow file. */ |
---|
148 | 157 | char *file; |
---|
.. | .. |
---|
153 | 162 | struct openflags openflags; |
---|
154 | 163 | unsigned shared:1; |
---|
155 | 164 | unsigned no_cow:1; |
---|
| 165 | + unsigned no_trim:1; |
---|
156 | 166 | struct cow cow; |
---|
157 | 167 | struct platform_device pdev; |
---|
158 | 168 | struct request_queue *queue; |
---|
| 169 | + struct blk_mq_tag_set tag_set; |
---|
159 | 170 | spinlock_t lock; |
---|
160 | | - struct scatterlist sg[MAX_SG]; |
---|
161 | | - struct request *request; |
---|
162 | | - int start_sg, end_sg; |
---|
163 | | - sector_t rq_pos; |
---|
164 | 171 | }; |
---|
165 | 172 | |
---|
166 | 173 | #define DEFAULT_COW { \ |
---|
.. | .. |
---|
179 | 186 | .boot_openflags = OPEN_FLAGS, \ |
---|
180 | 187 | .openflags = OPEN_FLAGS, \ |
---|
181 | 188 | .no_cow = 0, \ |
---|
| 189 | + .no_trim = 0, \ |
---|
182 | 190 | .shared = 0, \ |
---|
183 | 191 | .cow = DEFAULT_COW, \ |
---|
184 | 192 | .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ |
---|
185 | | - .request = NULL, \ |
---|
186 | | - .start_sg = 0, \ |
---|
187 | | - .end_sg = 0, \ |
---|
188 | | - .rq_pos = 0, \ |
---|
189 | 193 | } |
---|
190 | 194 | |
---|
191 | 195 | /* Protected by ubd_lock */ |
---|
.. | .. |
---|
195 | 199 | static int fake_ide = 0; |
---|
196 | 200 | static struct proc_dir_entry *proc_ide_root = NULL; |
---|
197 | 201 | static struct proc_dir_entry *proc_ide = NULL; |
---|
| 202 | + |
---|
| 203 | +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, |
---|
| 204 | + const struct blk_mq_queue_data *bd); |
---|
198 | 205 | |
---|
199 | 206 | static void make_proc_ide(void) |
---|
200 | 207 | { |
---|
.. | .. |
---|
277 | 284 | str++; |
---|
278 | 285 | if(!strcmp(str, "sync")){ |
---|
279 | 286 | global_openflags = of_sync(global_openflags); |
---|
280 | | - goto out1; |
---|
| 287 | + return err; |
---|
281 | 288 | } |
---|
282 | 289 | |
---|
283 | 290 | err = -EINVAL; |
---|
284 | 291 | major = simple_strtoul(str, &end, 0); |
---|
285 | 292 | if((*end != '\0') || (end == str)){ |
---|
286 | 293 | *error_out = "Didn't parse major number"; |
---|
287 | | - goto out1; |
---|
| 294 | + return err; |
---|
288 | 295 | } |
---|
289 | 296 | |
---|
290 | 297 | mutex_lock(&ubd_lock); |
---|
.. | .. |
---|
326 | 333 | *index_out = n; |
---|
327 | 334 | |
---|
328 | 335 | err = -EINVAL; |
---|
329 | | - for (i = 0; i < sizeof("rscd="); i++) { |
---|
| 336 | + for (i = 0; i < sizeof("rscdt="); i++) { |
---|
330 | 337 | switch (*str) { |
---|
331 | 338 | case 'r': |
---|
332 | 339 | flags.w = 0; |
---|
.. | .. |
---|
340 | 347 | case 'c': |
---|
341 | 348 | ubd_dev->shared = 1; |
---|
342 | 349 | break; |
---|
| 350 | + case 't': |
---|
| 351 | + ubd_dev->no_trim = 1; |
---|
| 352 | + break; |
---|
343 | 353 | case '=': |
---|
344 | 354 | str++; |
---|
345 | 355 | goto break_loop; |
---|
346 | 356 | default: |
---|
347 | 357 | *error_out = "Expected '=' or flag letter " |
---|
348 | | - "(r, s, c, or d)"; |
---|
| 358 | + "(r, s, c, t or d)"; |
---|
349 | 359 | goto out; |
---|
350 | 360 | } |
---|
351 | 361 | str++; |
---|
.. | .. |
---|
418 | 428 | " 'c' will cause the device to be treated as being shared between multiple\n" |
---|
419 | 429 | " UMLs and file locking will be turned off - this is appropriate for a\n" |
---|
420 | 430 | " cluster filesystem and inappropriate at almost all other times.\n\n" |
---|
| 431 | +" 't' will disable trim/discard support on the device (enabled by default).\n\n" |
---|
421 | 432 | ); |
---|
422 | 433 | |
---|
423 | 434 | static int udb_setup(char *str) |
---|
.. | .. |
---|
436 | 447 | " in the boot output.\n\n" |
---|
437 | 448 | ); |
---|
438 | 449 | |
---|
439 | | -static void do_ubd_request(struct request_queue * q); |
---|
440 | | - |
---|
441 | 450 | /* Only changed by ubd_init, which is an initcall. */ |
---|
442 | 451 | static int thread_fd = -1; |
---|
443 | | -static LIST_HEAD(restart); |
---|
444 | 452 | |
---|
445 | 453 | /* Function to read several request pointers at a time |
---|
446 | 454 | * handling fractional reads if (and as) needed |
---|
.. | .. |
---|
498 | 506 | /* Called without dev->lock held, and only in interrupt context. */ |
---|
499 | 507 | static void ubd_handler(void) |
---|
500 | 508 | { |
---|
501 | | - struct ubd *ubd; |
---|
502 | | - struct list_head *list, *next_ele; |
---|
503 | | - unsigned long flags; |
---|
504 | 509 | int n; |
---|
505 | 510 | int count; |
---|
506 | 511 | |
---|
.. | .. |
---|
520 | 525 | return; |
---|
521 | 526 | } |
---|
522 | 527 | for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { |
---|
523 | | - blk_end_request( |
---|
524 | | - (*irq_req_buffer)[count]->req, |
---|
525 | | - BLK_STS_OK, |
---|
526 | | - (*irq_req_buffer)[count]->length |
---|
527 | | - ); |
---|
528 | | - kfree((*irq_req_buffer)[count]); |
---|
529 | | - } |
---|
530 | | - } |
---|
531 | | - reactivate_fd(thread_fd, UBD_IRQ); |
---|
| 528 | + struct io_thread_req *io_req = (*irq_req_buffer)[count]; |
---|
532 | 529 | |
---|
533 | | - list_for_each_safe(list, next_ele, &restart){ |
---|
534 | | - ubd = container_of(list, struct ubd, restart); |
---|
535 | | - list_del_init(&ubd->restart); |
---|
536 | | - spin_lock_irqsave(&ubd->lock, flags); |
---|
537 | | - do_ubd_request(ubd->queue); |
---|
538 | | - spin_unlock_irqrestore(&ubd->lock, flags); |
---|
| 530 | + if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { |
---|
| 531 | + blk_queue_max_discard_sectors(io_req->req->q, 0); |
---|
| 532 | + blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); |
---|
| 533 | + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); |
---|
| 534 | + } |
---|
| 535 | + blk_mq_end_request(io_req->req, io_req->error); |
---|
| 536 | + kfree(io_req); |
---|
| 537 | + } |
---|
539 | 538 | } |
---|
540 | 539 | } |
---|
541 | 540 | |
---|
.. | .. |
---|
565 | 564 | __u32 version; |
---|
566 | 565 | __u32 align; |
---|
567 | 566 | char *backing_file; |
---|
568 | | - time_t mtime; |
---|
| 567 | + time64_t mtime; |
---|
569 | 568 | unsigned long long size; |
---|
570 | 569 | int sector_size; |
---|
571 | 570 | int bitmap_offset; |
---|
.. | .. |
---|
604 | 603 | return 0; |
---|
605 | 604 | } |
---|
606 | 605 | |
---|
607 | | -static int backing_file_mismatch(char *file, __u64 size, time_t mtime) |
---|
| 606 | +static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) |
---|
608 | 607 | { |
---|
609 | | - unsigned long modtime; |
---|
| 608 | + time64_t modtime; |
---|
610 | 609 | unsigned long long actual; |
---|
611 | 610 | int err; |
---|
612 | 611 | |
---|
.. | .. |
---|
632 | 631 | return -EINVAL; |
---|
633 | 632 | } |
---|
634 | 633 | if (modtime != mtime) { |
---|
635 | | - printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " |
---|
| 634 | + printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " |
---|
636 | 635 | "backing file\n", mtime, modtime); |
---|
637 | 636 | return -EINVAL; |
---|
638 | 637 | } |
---|
.. | .. |
---|
675 | 674 | unsigned long *bitmap_len_out, int *data_offset_out, |
---|
676 | 675 | int *create_cow_out) |
---|
677 | 676 | { |
---|
678 | | - time_t mtime; |
---|
| 677 | + time64_t mtime; |
---|
679 | 678 | unsigned long long size; |
---|
680 | 679 | __u32 version, align; |
---|
681 | 680 | char *backing_file; |
---|
.. | .. |
---|
805 | 804 | |
---|
806 | 805 | if((fd == -ENOENT) && create_cow){ |
---|
807 | 806 | fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, |
---|
808 | | - ubd_dev->openflags, 1 << 9, PAGE_SIZE, |
---|
| 807 | + ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE, |
---|
809 | 808 | &ubd_dev->cow.bitmap_offset, |
---|
810 | 809 | &ubd_dev->cow.bitmap_len, |
---|
811 | 810 | &ubd_dev->cow.data_offset); |
---|
.. | .. |
---|
846 | 845 | if(err < 0) goto error; |
---|
847 | 846 | ubd_dev->cow.fd = err; |
---|
848 | 847 | } |
---|
| 848 | + if (ubd_dev->no_trim == 0) { |
---|
| 849 | + ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; |
---|
| 850 | + ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; |
---|
| 851 | + blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); |
---|
| 852 | + blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); |
---|
| 853 | + blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); |
---|
| 854 | + } |
---|
| 855 | + blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); |
---|
849 | 856 | return 0; |
---|
850 | 857 | error: |
---|
851 | 858 | os_close_file(ubd_dev->fd); |
---|
.. | .. |
---|
857 | 864 | struct ubd *ubd_dev = dev_get_drvdata(dev); |
---|
858 | 865 | |
---|
859 | 866 | blk_cleanup_queue(ubd_dev->queue); |
---|
| 867 | + blk_mq_free_tag_set(&ubd_dev->tag_set); |
---|
860 | 868 | *ubd_dev = ((struct ubd) DEFAULT_UBD); |
---|
861 | 869 | } |
---|
862 | 870 | |
---|
.. | .. |
---|
891 | 899 | |
---|
892 | 900 | disk->private_data = &ubd_devs[unit]; |
---|
893 | 901 | disk->queue = ubd_devs[unit].queue; |
---|
894 | | - device_add_disk(parent, disk); |
---|
| 902 | + device_add_disk(parent, disk, NULL); |
---|
895 | 903 | |
---|
896 | 904 | *disk_out = disk; |
---|
897 | 905 | return 0; |
---|
898 | 906 | } |
---|
899 | 907 | |
---|
900 | | -#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) |
---|
| 908 | +#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) |
---|
| 909 | + |
---|
| 910 | +static const struct blk_mq_ops ubd_mq_ops = { |
---|
| 911 | + .queue_rq = ubd_queue_rq, |
---|
| 912 | +}; |
---|
901 | 913 | |
---|
902 | 914 | static int ubd_add(int n, char **error_out) |
---|
903 | 915 | { |
---|
.. | .. |
---|
915 | 927 | |
---|
916 | 928 | ubd_dev->size = ROUND_BLOCK(ubd_dev->size); |
---|
917 | 929 | |
---|
918 | | - INIT_LIST_HEAD(&ubd_dev->restart); |
---|
919 | | - sg_init_table(ubd_dev->sg, MAX_SG); |
---|
| 930 | + ubd_dev->tag_set.ops = &ubd_mq_ops; |
---|
| 931 | + ubd_dev->tag_set.queue_depth = 64; |
---|
| 932 | + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; |
---|
| 933 | + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
---|
| 934 | + ubd_dev->tag_set.driver_data = ubd_dev; |
---|
| 935 | + ubd_dev->tag_set.nr_hw_queues = 1; |
---|
920 | 936 | |
---|
921 | | - err = -ENOMEM; |
---|
922 | | - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); |
---|
923 | | - if (ubd_dev->queue == NULL) { |
---|
924 | | - *error_out = "Failed to initialize device queue"; |
---|
| 937 | + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); |
---|
| 938 | + if (err) |
---|
925 | 939 | goto out; |
---|
| 940 | + |
---|
| 941 | + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); |
---|
| 942 | + if (IS_ERR(ubd_dev->queue)) { |
---|
| 943 | + err = PTR_ERR(ubd_dev->queue); |
---|
| 944 | + goto out_cleanup_tags; |
---|
926 | 945 | } |
---|
| 946 | + |
---|
927 | 947 | ubd_dev->queue->queuedata = ubd_dev; |
---|
928 | 948 | blk_queue_write_cache(ubd_dev->queue, true, false); |
---|
929 | 949 | |
---|
930 | 950 | blk_queue_max_segments(ubd_dev->queue, MAX_SG); |
---|
| 951 | + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); |
---|
931 | 952 | err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); |
---|
932 | 953 | if(err){ |
---|
933 | 954 | *error_out = "Failed to register device"; |
---|
934 | | - goto out_cleanup; |
---|
| 955 | + goto out_cleanup_tags; |
---|
935 | 956 | } |
---|
936 | 957 | |
---|
937 | 958 | if (fake_major != UBD_MAJOR) |
---|
.. | .. |
---|
949 | 970 | out: |
---|
950 | 971 | return err; |
---|
951 | 972 | |
---|
952 | | -out_cleanup: |
---|
953 | | - blk_cleanup_queue(ubd_dev->queue); |
---|
| 973 | +out_cleanup_tags: |
---|
| 974 | + blk_mq_free_tag_set(&ubd_dev->tag_set); |
---|
| 975 | + if (!(IS_ERR(ubd_dev->queue))) |
---|
| 976 | + blk_cleanup_queue(ubd_dev->queue); |
---|
954 | 977 | goto out; |
---|
955 | 978 | } |
---|
956 | 979 | |
---|
.. | .. |
---|
1235 | 1258 | __u64 bitmap_offset, unsigned long *bitmap_words, |
---|
1236 | 1259 | __u64 bitmap_len) |
---|
1237 | 1260 | { |
---|
1238 | | - __u64 sector = io_offset >> 9; |
---|
| 1261 | + __u64 sector = io_offset >> SECTOR_SHIFT; |
---|
1239 | 1262 | int i, update_bitmap = 0; |
---|
1240 | 1263 | |
---|
1241 | | - for(i = 0; i < length >> 9; i++){ |
---|
| 1264 | + for (i = 0; i < length >> SECTOR_SHIFT; i++) { |
---|
1242 | 1265 | if(cow_mask != NULL) |
---|
1243 | 1266 | ubd_set_bit(i, (unsigned char *) cow_mask); |
---|
1244 | 1267 | if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) |
---|
.. | .. |
---|
1269 | 1292 | *cow_offset += bitmap_offset; |
---|
1270 | 1293 | } |
---|
1271 | 1294 | |
---|
1272 | | -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, |
---|
| 1295 | +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, |
---|
| 1296 | + unsigned long offset, unsigned long *bitmap, |
---|
1273 | 1297 | __u64 bitmap_offset, __u64 bitmap_len) |
---|
1274 | 1298 | { |
---|
1275 | | - __u64 sector = req->offset >> 9; |
---|
| 1299 | + __u64 sector = offset >> SECTOR_SHIFT; |
---|
1276 | 1300 | int i; |
---|
1277 | 1301 | |
---|
1278 | | - if(req->length > (sizeof(req->sector_mask) * 8) << 9) |
---|
| 1302 | + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) |
---|
1279 | 1303 | panic("Operation too long"); |
---|
1280 | 1304 | |
---|
1281 | | - if(req->op == UBD_READ) { |
---|
1282 | | - for(i = 0; i < req->length >> 9; i++){ |
---|
| 1305 | + if (req_op(req->req) == REQ_OP_READ) { |
---|
| 1306 | + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { |
---|
1283 | 1307 | if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) |
---|
1284 | 1308 | ubd_set_bit(i, (unsigned char *) |
---|
1285 | | - &req->sector_mask); |
---|
| 1309 | + &segment->sector_mask); |
---|
| 1310 | + } |
---|
| 1311 | + } else { |
---|
| 1312 | + cowify_bitmap(offset, segment->length, &segment->sector_mask, |
---|
| 1313 | + &segment->cow_offset, bitmap, bitmap_offset, |
---|
| 1314 | + segment->bitmap_words, bitmap_len); |
---|
| 1315 | + } |
---|
| 1316 | +} |
---|
| 1317 | + |
---|
| 1318 | +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, |
---|
| 1319 | + struct request *req) |
---|
| 1320 | +{ |
---|
| 1321 | + struct bio_vec bvec; |
---|
| 1322 | + struct req_iterator iter; |
---|
| 1323 | + int i = 0; |
---|
| 1324 | + unsigned long byte_offset = io_req->offset; |
---|
| 1325 | + int op = req_op(req); |
---|
| 1326 | + |
---|
| 1327 | + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { |
---|
| 1328 | + io_req->io_desc[0].buffer = NULL; |
---|
| 1329 | + io_req->io_desc[0].length = blk_rq_bytes(req); |
---|
| 1330 | + } else { |
---|
| 1331 | + rq_for_each_segment(bvec, req, iter) { |
---|
| 1332 | + BUG_ON(i >= io_req->desc_cnt); |
---|
| 1333 | + |
---|
| 1334 | + io_req->io_desc[i].buffer = |
---|
| 1335 | + page_address(bvec.bv_page) + bvec.bv_offset; |
---|
| 1336 | + io_req->io_desc[i].length = bvec.bv_len; |
---|
| 1337 | + i++; |
---|
1286 | 1338 | } |
---|
1287 | 1339 | } |
---|
1288 | | - else cowify_bitmap(req->offset, req->length, &req->sector_mask, |
---|
1289 | | - &req->cow_offset, bitmap, bitmap_offset, |
---|
1290 | | - req->bitmap_words, bitmap_len); |
---|
1291 | | -} |
---|
1292 | 1340 | |
---|
1293 | | -/* Called with dev->lock held */ |
---|
1294 | | -static void prepare_request(struct request *req, struct io_thread_req *io_req, |
---|
1295 | | - unsigned long long offset, int page_offset, |
---|
1296 | | - int len, struct page *page) |
---|
1297 | | -{ |
---|
1298 | | - struct gendisk *disk = req->rq_disk; |
---|
1299 | | - struct ubd *ubd_dev = disk->private_data; |
---|
| 1341 | + if (dev->cow.file) { |
---|
| 1342 | + for (i = 0; i < io_req->desc_cnt; i++) { |
---|
| 1343 | + cowify_req(io_req, &io_req->io_desc[i], byte_offset, |
---|
| 1344 | + dev->cow.bitmap, dev->cow.bitmap_offset, |
---|
| 1345 | + dev->cow.bitmap_len); |
---|
| 1346 | + byte_offset += io_req->io_desc[i].length; |
---|
| 1347 | + } |
---|
1300 | 1348 | |
---|
1301 | | - io_req->req = req; |
---|
1302 | | - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : |
---|
1303 | | - ubd_dev->fd; |
---|
1304 | | - io_req->fds[1] = ubd_dev->fd; |
---|
1305 | | - io_req->cow_offset = -1; |
---|
1306 | | - io_req->offset = offset; |
---|
1307 | | - io_req->length = len; |
---|
1308 | | - io_req->error = 0; |
---|
1309 | | - io_req->sector_mask = 0; |
---|
1310 | | - |
---|
1311 | | - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; |
---|
1312 | | - io_req->offsets[0] = 0; |
---|
1313 | | - io_req->offsets[1] = ubd_dev->cow.data_offset; |
---|
1314 | | - io_req->buffer = page_address(page) + page_offset; |
---|
1315 | | - io_req->sectorsize = 1 << 9; |
---|
1316 | | - |
---|
1317 | | - if(ubd_dev->cow.file != NULL) |
---|
1318 | | - cowify_req(io_req, ubd_dev->cow.bitmap, |
---|
1319 | | - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); |
---|
1320 | | - |
---|
1321 | | -} |
---|
1322 | | - |
---|
1323 | | -/* Called with dev->lock held */ |
---|
1324 | | -static void prepare_flush_request(struct request *req, |
---|
1325 | | - struct io_thread_req *io_req) |
---|
1326 | | -{ |
---|
1327 | | - struct gendisk *disk = req->rq_disk; |
---|
1328 | | - struct ubd *ubd_dev = disk->private_data; |
---|
1329 | | - |
---|
1330 | | - io_req->req = req; |
---|
1331 | | - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : |
---|
1332 | | - ubd_dev->fd; |
---|
1333 | | - io_req->op = UBD_FLUSH; |
---|
1334 | | -} |
---|
1335 | | - |
---|
1336 | | -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) |
---|
1337 | | -{ |
---|
1338 | | - int n = os_write_file(thread_fd, &io_req, |
---|
1339 | | - sizeof(io_req)); |
---|
1340 | | - if (n != sizeof(io_req)) { |
---|
1341 | | - if (n != -EAGAIN) |
---|
1342 | | - printk("write to io thread failed, " |
---|
1343 | | - "errno = %d\n", -n); |
---|
1344 | | - else if (list_empty(&dev->restart)) |
---|
1345 | | - list_add(&dev->restart, &restart); |
---|
1346 | | - |
---|
1347 | | - kfree(io_req); |
---|
1348 | | - return false; |
---|
1349 | 1349 | } |
---|
1350 | | - return true; |
---|
1351 | 1350 | } |
---|
1352 | 1351 | |
---|
1353 | | -/* Called with dev->lock held */ |
---|
1354 | | -static void do_ubd_request(struct request_queue *q) |
---|
| 1352 | +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, |
---|
| 1353 | + int desc_cnt) |
---|
1355 | 1354 | { |
---|
1356 | 1355 | struct io_thread_req *io_req; |
---|
1357 | | - struct request *req; |
---|
| 1356 | + int i; |
---|
1358 | 1357 | |
---|
1359 | | - while(1){ |
---|
1360 | | - struct ubd *dev = q->queuedata; |
---|
1361 | | - if(dev->request == NULL){ |
---|
1362 | | - struct request *req = blk_fetch_request(q); |
---|
1363 | | - if(req == NULL) |
---|
1364 | | - return; |
---|
| 1358 | + io_req = kmalloc(sizeof(*io_req) + |
---|
| 1359 | + (desc_cnt * sizeof(struct io_desc)), |
---|
| 1360 | + GFP_ATOMIC); |
---|
| 1361 | + if (!io_req) |
---|
| 1362 | + return NULL; |
---|
1365 | 1363 | |
---|
1366 | | - dev->request = req; |
---|
1367 | | - dev->rq_pos = blk_rq_pos(req); |
---|
1368 | | - dev->start_sg = 0; |
---|
1369 | | - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); |
---|
1370 | | - } |
---|
| 1364 | + io_req->req = req; |
---|
| 1365 | + if (dev->cow.file) |
---|
| 1366 | + io_req->fds[0] = dev->cow.fd; |
---|
| 1367 | + else |
---|
| 1368 | + io_req->fds[0] = dev->fd; |
---|
| 1369 | + io_req->error = 0; |
---|
| 1370 | + io_req->sectorsize = SECTOR_SIZE; |
---|
| 1371 | + io_req->fds[1] = dev->fd; |
---|
| 1372 | + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; |
---|
| 1373 | + io_req->offsets[0] = 0; |
---|
| 1374 | + io_req->offsets[1] = dev->cow.data_offset; |
---|
1371 | 1375 | |
---|
1372 | | - req = dev->request; |
---|
1373 | | - |
---|
1374 | | - if (req_op(req) == REQ_OP_FLUSH) { |
---|
1375 | | - io_req = kmalloc(sizeof(struct io_thread_req), |
---|
1376 | | - GFP_ATOMIC); |
---|
1377 | | - if (io_req == NULL) { |
---|
1378 | | - if (list_empty(&dev->restart)) |
---|
1379 | | - list_add(&dev->restart, &restart); |
---|
1380 | | - return; |
---|
1381 | | - } |
---|
1382 | | - prepare_flush_request(req, io_req); |
---|
1383 | | - if (submit_request(io_req, dev) == false) |
---|
1384 | | - return; |
---|
1385 | | - } |
---|
1386 | | - |
---|
1387 | | - while(dev->start_sg < dev->end_sg){ |
---|
1388 | | - struct scatterlist *sg = &dev->sg[dev->start_sg]; |
---|
1389 | | - |
---|
1390 | | - io_req = kmalloc(sizeof(struct io_thread_req), |
---|
1391 | | - GFP_ATOMIC); |
---|
1392 | | - if(io_req == NULL){ |
---|
1393 | | - if(list_empty(&dev->restart)) |
---|
1394 | | - list_add(&dev->restart, &restart); |
---|
1395 | | - return; |
---|
1396 | | - } |
---|
1397 | | - prepare_request(req, io_req, |
---|
1398 | | - (unsigned long long)dev->rq_pos << 9, |
---|
1399 | | - sg->offset, sg->length, sg_page(sg)); |
---|
1400 | | - |
---|
1401 | | - if (submit_request(io_req, dev) == false) |
---|
1402 | | - return; |
---|
1403 | | - |
---|
1404 | | - dev->rq_pos += sg->length >> 9; |
---|
1405 | | - dev->start_sg++; |
---|
1406 | | - } |
---|
1407 | | - dev->end_sg = 0; |
---|
1408 | | - dev->request = NULL; |
---|
| 1376 | + for (i = 0 ; i < desc_cnt; i++) { |
---|
| 1377 | + io_req->io_desc[i].sector_mask = 0; |
---|
| 1378 | + io_req->io_desc[i].cow_offset = -1; |
---|
1409 | 1379 | } |
---|
| 1380 | + |
---|
| 1381 | + return io_req; |
---|
| 1382 | +} |
---|
| 1383 | + |
---|
| 1384 | +static int ubd_submit_request(struct ubd *dev, struct request *req) |
---|
| 1385 | +{ |
---|
| 1386 | + int segs = 0; |
---|
| 1387 | + struct io_thread_req *io_req; |
---|
| 1388 | + int ret; |
---|
| 1389 | + int op = req_op(req); |
---|
| 1390 | + |
---|
| 1391 | + if (op == REQ_OP_FLUSH) |
---|
| 1392 | + segs = 0; |
---|
| 1393 | + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) |
---|
| 1394 | + segs = 1; |
---|
| 1395 | + else |
---|
| 1396 | + segs = blk_rq_nr_phys_segments(req); |
---|
| 1397 | + |
---|
| 1398 | + io_req = ubd_alloc_req(dev, req, segs); |
---|
| 1399 | + if (!io_req) |
---|
| 1400 | + return -ENOMEM; |
---|
| 1401 | + |
---|
| 1402 | + io_req->desc_cnt = segs; |
---|
| 1403 | + if (segs) |
---|
| 1404 | + ubd_map_req(dev, io_req, req); |
---|
| 1405 | + |
---|
| 1406 | + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); |
---|
| 1407 | + if (ret != sizeof(io_req)) { |
---|
| 1408 | + if (ret != -EAGAIN) |
---|
| 1409 | + pr_err("write to io thread failed: %d\n", -ret); |
---|
| 1410 | + kfree(io_req); |
---|
| 1411 | + } |
---|
| 1412 | + return ret; |
---|
| 1413 | +} |
---|
| 1414 | + |
---|
| 1415 | +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, |
---|
| 1416 | + const struct blk_mq_queue_data *bd) |
---|
| 1417 | +{ |
---|
| 1418 | + struct ubd *ubd_dev = hctx->queue->queuedata; |
---|
| 1419 | + struct request *req = bd->rq; |
---|
| 1420 | + int ret = 0, res = BLK_STS_OK; |
---|
| 1421 | + |
---|
| 1422 | + blk_mq_start_request(req); |
---|
| 1423 | + |
---|
| 1424 | + spin_lock_irq(&ubd_dev->lock); |
---|
| 1425 | + |
---|
| 1426 | + switch (req_op(req)) { |
---|
| 1427 | + case REQ_OP_FLUSH: |
---|
| 1428 | + case REQ_OP_READ: |
---|
| 1429 | + case REQ_OP_WRITE: |
---|
| 1430 | + case REQ_OP_DISCARD: |
---|
| 1431 | + case REQ_OP_WRITE_ZEROES: |
---|
| 1432 | + ret = ubd_submit_request(ubd_dev, req); |
---|
| 1433 | + break; |
---|
| 1434 | + default: |
---|
| 1435 | + WARN_ON_ONCE(1); |
---|
| 1436 | + res = BLK_STS_NOTSUPP; |
---|
| 1437 | + } |
---|
| 1438 | + |
---|
| 1439 | + spin_unlock_irq(&ubd_dev->lock); |
---|
| 1440 | + |
---|
| 1441 | + if (ret < 0) { |
---|
| 1442 | + if (ret == -ENOMEM) |
---|
| 1443 | + res = BLK_STS_RESOURCE; |
---|
| 1444 | + else |
---|
| 1445 | + res = BLK_STS_DEV_RESOURCE; |
---|
| 1446 | + } |
---|
| 1447 | + |
---|
| 1448 | + return res; |
---|
1410 | 1449 | } |
---|
1411 | 1450 | |
---|
1412 | 1451 | static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
---|
.. | .. |
---|
1451 | 1490 | return -EINVAL; |
---|
1452 | 1491 | } |
---|
1453 | 1492 | |
---|
1454 | | -static int update_bitmap(struct io_thread_req *req) |
---|
| 1493 | +static int map_error(int error_code) |
---|
| 1494 | +{ |
---|
| 1495 | + switch (error_code) { |
---|
| 1496 | + case 0: |
---|
| 1497 | + return BLK_STS_OK; |
---|
| 1498 | + case ENOSYS: |
---|
| 1499 | + case EOPNOTSUPP: |
---|
| 1500 | + return BLK_STS_NOTSUPP; |
---|
| 1501 | + case ENOSPC: |
---|
| 1502 | + return BLK_STS_NOSPC; |
---|
| 1503 | + } |
---|
| 1504 | + return BLK_STS_IOERR; |
---|
| 1505 | +} |
---|
| 1506 | + |
---|
| 1507 | +/* |
---|
| 1508 | + * Everything from here onwards *IS NOT PART OF THE KERNEL* |
---|
| 1509 | + * |
---|
| 1510 | + * The following functions are part of UML hypervisor code. |
---|
| 1511 | + * All functions from here onwards are executed as a helper |
---|
| 1512 | + * thread and are not allowed to execute any kernel functions. |
---|
| 1513 | + * |
---|
| 1514 | + * Any communication must occur strictly via shared memory and IPC. |
---|
| 1515 | + * |
---|
| 1516 | + * Do not add printks, locks, kernel memory operations, etc - it |
---|
| 1517 | + * will result in unpredictable behaviour and/or crashes. |
---|
| 1518 | + */ |
---|
| 1519 | + |
---|
| 1520 | +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) |
---|
1455 | 1521 | { |
---|
1456 | 1522 | int n; |
---|
1457 | 1523 | |
---|
1458 | | - if(req->cow_offset == -1) |
---|
1459 | | - return 0; |
---|
| 1524 | + if (segment->cow_offset == -1) |
---|
| 1525 | + return map_error(0); |
---|
1460 | 1526 | |
---|
1461 | | - n = os_pwrite_file(req->fds[1], &req->bitmap_words, |
---|
1462 | | - sizeof(req->bitmap_words), req->cow_offset); |
---|
1463 | | - if(n != sizeof(req->bitmap_words)){ |
---|
1464 | | - printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, |
---|
1465 | | - req->fds[1]); |
---|
1466 | | - return 1; |
---|
1467 | | - } |
---|
| 1527 | + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, |
---|
| 1528 | + sizeof(segment->bitmap_words), segment->cow_offset); |
---|
| 1529 | + if (n != sizeof(segment->bitmap_words)) |
---|
| 1530 | + return map_error(-n); |
---|
1468 | 1531 | |
---|
1469 | | - return 0; |
---|
| 1532 | + return map_error(0); |
---|
1470 | 1533 | } |
---|
1471 | 1534 | |
---|
1472 | | -static void do_io(struct io_thread_req *req) |
---|
| 1535 | +static void do_io(struct io_thread_req *req, struct io_desc *desc) |
---|
1473 | 1536 | { |
---|
1474 | | - char *buf; |
---|
| 1537 | + char *buf = NULL; |
---|
1475 | 1538 | unsigned long len; |
---|
1476 | 1539 | int n, nsectors, start, end, bit; |
---|
1477 | 1540 | __u64 off; |
---|
1478 | 1541 | |
---|
1479 | | - if (req->op == UBD_FLUSH) { |
---|
| 1542 | + /* FLUSH is really a special case, we cannot "case" it with others */ |
---|
| 1543 | + |
---|
| 1544 | + if (req_op(req->req) == REQ_OP_FLUSH) { |
---|
1480 | 1545 | /* fds[0] is always either the rw image or our cow file */ |
---|
1481 | | - n = os_sync_file(req->fds[0]); |
---|
1482 | | - if (n != 0) { |
---|
1483 | | - printk("do_io - sync failed err = %d " |
---|
1484 | | - "fd = %d\n", -n, req->fds[0]); |
---|
1485 | | - req->error = 1; |
---|
1486 | | - } |
---|
| 1546 | + req->error = map_error(-os_sync_file(req->fds[0])); |
---|
1487 | 1547 | return; |
---|
1488 | 1548 | } |
---|
1489 | 1549 | |
---|
1490 | | - nsectors = req->length / req->sectorsize; |
---|
| 1550 | + nsectors = desc->length / req->sectorsize; |
---|
1491 | 1551 | start = 0; |
---|
1492 | 1552 | do { |
---|
1493 | | - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); |
---|
| 1553 | + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); |
---|
1494 | 1554 | end = start; |
---|
1495 | 1555 | while((end < nsectors) && |
---|
1496 | | - (ubd_test_bit(end, (unsigned char *) |
---|
1497 | | - &req->sector_mask) == bit)) |
---|
| 1556 | + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) |
---|
1498 | 1557 | end++; |
---|
1499 | 1558 | |
---|
1500 | 1559 | off = req->offset + req->offsets[bit] + |
---|
1501 | 1560 | start * req->sectorsize; |
---|
1502 | 1561 | len = (end - start) * req->sectorsize; |
---|
1503 | | - buf = &req->buffer[start * req->sectorsize]; |
---|
| 1562 | + if (desc->buffer != NULL) |
---|
| 1563 | + buf = &desc->buffer[start * req->sectorsize]; |
---|
1504 | 1564 | |
---|
1505 | | - if(req->op == UBD_READ){ |
---|
| 1565 | + switch (req_op(req->req)) { |
---|
| 1566 | + case REQ_OP_READ: |
---|
1506 | 1567 | n = 0; |
---|
1507 | 1568 | do { |
---|
1508 | 1569 | buf = &buf[n]; |
---|
1509 | 1570 | len -= n; |
---|
1510 | 1571 | n = os_pread_file(req->fds[bit], buf, len, off); |
---|
1511 | 1572 | if (n < 0) { |
---|
1512 | | - printk("do_io - read failed, err = %d " |
---|
1513 | | - "fd = %d\n", -n, req->fds[bit]); |
---|
1514 | | - req->error = 1; |
---|
| 1573 | + req->error = map_error(-n); |
---|
1515 | 1574 | return; |
---|
1516 | 1575 | } |
---|
1517 | 1576 | } while((n < len) && (n != 0)); |
---|
1518 | 1577 | if (n < len) memset(&buf[n], 0, len - n); |
---|
1519 | | - } else { |
---|
| 1578 | + break; |
---|
| 1579 | + case REQ_OP_WRITE: |
---|
1520 | 1580 | n = os_pwrite_file(req->fds[bit], buf, len, off); |
---|
1521 | 1581 | if(n != len){ |
---|
1522 | | - printk("do_io - write failed err = %d " |
---|
1523 | | - "fd = %d\n", -n, req->fds[bit]); |
---|
1524 | | - req->error = 1; |
---|
| 1582 | + req->error = map_error(-n); |
---|
1525 | 1583 | return; |
---|
1526 | 1584 | } |
---|
| 1585 | + break; |
---|
| 1586 | + case REQ_OP_DISCARD: |
---|
| 1587 | + case REQ_OP_WRITE_ZEROES: |
---|
| 1588 | + n = os_falloc_punch(req->fds[bit], off, len); |
---|
| 1589 | + if (n) { |
---|
| 1590 | + req->error = map_error(-n); |
---|
| 1591 | + return; |
---|
| 1592 | + } |
---|
| 1593 | + break; |
---|
| 1594 | + default: |
---|
| 1595 | + WARN_ON_ONCE(1); |
---|
| 1596 | + req->error = BLK_STS_NOTSUPP; |
---|
| 1597 | + return; |
---|
1527 | 1598 | } |
---|
1528 | 1599 | |
---|
1529 | 1600 | start = end; |
---|
1530 | 1601 | } while(start < nsectors); |
---|
1531 | 1602 | |
---|
1532 | | - req->error = update_bitmap(req); |
---|
| 1603 | + req->offset += len; |
---|
| 1604 | + req->error = update_bitmap(req, desc); |
---|
1533 | 1605 | } |
---|
1534 | 1606 | |
---|
1535 | 1607 | /* Changed in start_io_thread, which is serialized by being called only |
---|
.. | .. |
---|
1554 | 1626 | &io_remainder_size, |
---|
1555 | 1627 | UBD_REQ_BUFFER_SIZE |
---|
1556 | 1628 | ); |
---|
1557 | | - if (n < 0) { |
---|
1558 | | - if (n == -EAGAIN) { |
---|
| 1629 | + if (n <= 0) { |
---|
| 1630 | + if (n == -EAGAIN) |
---|
1559 | 1631 | ubd_read_poll(-1); |
---|
1560 | | - continue; |
---|
1561 | | - } else { |
---|
1562 | | - printk("io_thread - read failed, fd = %d, " |
---|
1563 | | - "err = %d," |
---|
1564 | | - "reminder = %d\n", |
---|
1565 | | - kernel_fd, -n, io_remainder_size); |
---|
1566 | | - } |
---|
| 1632 | + |
---|
| 1633 | + continue; |
---|
1567 | 1634 | } |
---|
1568 | 1635 | |
---|
1569 | 1636 | for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { |
---|
| 1637 | + struct io_thread_req *req = (*io_req_buffer)[count]; |
---|
| 1638 | + int i; |
---|
| 1639 | + |
---|
1570 | 1640 | io_count++; |
---|
1571 | | - do_io((*io_req_buffer)[count]); |
---|
| 1641 | + for (i = 0; !req->error && i < req->desc_cnt; i++) |
---|
| 1642 | + do_io(req, &(req->io_desc[i])); |
---|
| 1643 | + |
---|
1572 | 1644 | } |
---|
1573 | 1645 | |
---|
1574 | 1646 | written = 0; |
---|
.. | .. |
---|
1579 | 1651 | n - written); |
---|
1580 | 1652 | if (res >= 0) { |
---|
1581 | 1653 | written += res; |
---|
1582 | | - } else { |
---|
1583 | | - if (res != -EAGAIN) { |
---|
1584 | | - printk("io_thread - write failed, fd = %d, " |
---|
1585 | | - "err = %d\n", kernel_fd, -n); |
---|
1586 | | - } |
---|
1587 | 1654 | } |
---|
1588 | 1655 | if (written < n) { |
---|
1589 | 1656 | ubd_write_poll(-1); |
---|