hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/arch/um/drivers/ubd_kern.c
....@@ -1,7 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
3
+ * Copyright (C) 2018 Cambridge Greys Ltd
24 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
35 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
4
- * Licensed under the GPL
56 */
67
78 /* 2001-09-28...2002-04-17
....@@ -23,6 +24,7 @@
2324 #include <linux/module.h>
2425 #include <linux/init.h>
2526 #include <linux/blkdev.h>
27
+#include <linux/blk-mq.h>
2628 #include <linux/ata.h>
2729 #include <linux/hdreg.h>
2830 #include <linux/cdrom.h>
....@@ -42,21 +44,28 @@
4244 #include <os.h>
4345 #include "cow.h"
4446
45
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
47
+/* Max request size is determined by sector mask - 32K */
48
+#define UBD_MAX_REQUEST (8 * sizeof(long))
4649
47
-struct io_thread_req {
48
- struct request *req;
49
- enum ubd_req op;
50
- int fds[2];
51
- unsigned long offsets[2];
52
- unsigned long long offset;
53
- unsigned long length;
50
+struct io_desc {
5451 char *buffer;
55
- int sectorsize;
52
+ unsigned long length;
5653 unsigned long sector_mask;
5754 unsigned long long cow_offset;
5855 unsigned long bitmap_words[2];
56
+};
57
+
58
+struct io_thread_req {
59
+ struct request *req;
60
+ int fds[2];
61
+ unsigned long offsets[2];
62
+ unsigned long long offset;
63
+ int sectorsize;
5964 int error;
65
+
66
+ int desc_cnt;
67
+ /* io_desc has to be the last element of the struct */
68
+ struct io_desc io_desc[];
6069 };
6170
6271
....@@ -111,6 +120,7 @@
111120 .open = ubd_open,
112121 .release = ubd_release,
113122 .ioctl = ubd_ioctl,
123
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
114124 .getgeo = ubd_getgeo,
115125 };
116126
....@@ -142,7 +152,6 @@
142152 #define MAX_SG 64
143153
144154 struct ubd {
145
- struct list_head restart;
146155 /* name (and fd, below) of the file opened for writing, either the
147156 * backing or the cow file. */
148157 char *file;
....@@ -153,14 +162,12 @@
153162 struct openflags openflags;
154163 unsigned shared:1;
155164 unsigned no_cow:1;
165
+ unsigned no_trim:1;
156166 struct cow cow;
157167 struct platform_device pdev;
158168 struct request_queue *queue;
169
+ struct blk_mq_tag_set tag_set;
159170 spinlock_t lock;
160
- struct scatterlist sg[MAX_SG];
161
- struct request *request;
162
- int start_sg, end_sg;
163
- sector_t rq_pos;
164171 };
165172
166173 #define DEFAULT_COW { \
....@@ -179,13 +186,10 @@
179186 .boot_openflags = OPEN_FLAGS, \
180187 .openflags = OPEN_FLAGS, \
181188 .no_cow = 0, \
189
+ .no_trim = 0, \
182190 .shared = 0, \
183191 .cow = DEFAULT_COW, \
184192 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
185
- .request = NULL, \
186
- .start_sg = 0, \
187
- .end_sg = 0, \
188
- .rq_pos = 0, \
189193 }
190194
191195 /* Protected by ubd_lock */
....@@ -195,6 +199,9 @@
195199 static int fake_ide = 0;
196200 static struct proc_dir_entry *proc_ide_root = NULL;
197201 static struct proc_dir_entry *proc_ide = NULL;
202
+
203
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
204
+ const struct blk_mq_queue_data *bd);
198205
199206 static void make_proc_ide(void)
200207 {
....@@ -277,14 +284,14 @@
277284 str++;
278285 if(!strcmp(str, "sync")){
279286 global_openflags = of_sync(global_openflags);
280
- goto out1;
287
+ return err;
281288 }
282289
283290 err = -EINVAL;
284291 major = simple_strtoul(str, &end, 0);
285292 if((*end != '\0') || (end == str)){
286293 *error_out = "Didn't parse major number";
287
- goto out1;
294
+ return err;
288295 }
289296
290297 mutex_lock(&ubd_lock);
....@@ -326,7 +333,7 @@
326333 *index_out = n;
327334
328335 err = -EINVAL;
329
- for (i = 0; i < sizeof("rscd="); i++) {
336
+ for (i = 0; i < sizeof("rscdt="); i++) {
330337 switch (*str) {
331338 case 'r':
332339 flags.w = 0;
....@@ -340,12 +347,15 @@
340347 case 'c':
341348 ubd_dev->shared = 1;
342349 break;
350
+ case 't':
351
+ ubd_dev->no_trim = 1;
352
+ break;
343353 case '=':
344354 str++;
345355 goto break_loop;
346356 default:
347357 *error_out = "Expected '=' or flag letter "
348
- "(r, s, c, or d)";
358
+ "(r, s, c, t or d)";
349359 goto out;
350360 }
351361 str++;
....@@ -418,6 +428,7 @@
418428 " 'c' will cause the device to be treated as being shared between multiple\n"
419429 " UMLs and file locking will be turned off - this is appropriate for a\n"
420430 " cluster filesystem and inappropriate at almost all other times.\n\n"
431
+" 't' will disable trim/discard support on the device (enabled by default).\n\n"
421432 );
422433
423434 static int udb_setup(char *str)
....@@ -436,11 +447,8 @@
436447 " in the boot output.\n\n"
437448 );
438449
439
-static void do_ubd_request(struct request_queue * q);
440
-
441450 /* Only changed by ubd_init, which is an initcall. */
442451 static int thread_fd = -1;
443
-static LIST_HEAD(restart);
444452
445453 /* Function to read several request pointers at a time
446454 * handling fractional reads if (and as) needed
....@@ -498,9 +506,6 @@
498506 /* Called without dev->lock held, and only in interrupt context. */
499507 static void ubd_handler(void)
500508 {
501
- struct ubd *ubd;
502
- struct list_head *list, *next_ele;
503
- unsigned long flags;
504509 int n;
505510 int count;
506511
....@@ -520,22 +525,16 @@
520525 return;
521526 }
522527 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
523
- blk_end_request(
524
- (*irq_req_buffer)[count]->req,
525
- BLK_STS_OK,
526
- (*irq_req_buffer)[count]->length
527
- );
528
- kfree((*irq_req_buffer)[count]);
529
- }
530
- }
531
- reactivate_fd(thread_fd, UBD_IRQ);
528
+ struct io_thread_req *io_req = (*irq_req_buffer)[count];
532529
533
- list_for_each_safe(list, next_ele, &restart){
534
- ubd = container_of(list, struct ubd, restart);
535
- list_del_init(&ubd->restart);
536
- spin_lock_irqsave(&ubd->lock, flags);
537
- do_ubd_request(ubd->queue);
538
- spin_unlock_irqrestore(&ubd->lock, flags);
530
+ if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
531
+ blk_queue_max_discard_sectors(io_req->req->q, 0);
532
+ blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
533
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
534
+ }
535
+ blk_mq_end_request(io_req->req, io_req->error);
536
+ kfree(io_req);
537
+ }
539538 }
540539 }
541540
....@@ -565,7 +564,7 @@
565564 __u32 version;
566565 __u32 align;
567566 char *backing_file;
568
- time_t mtime;
567
+ time64_t mtime;
569568 unsigned long long size;
570569 int sector_size;
571570 int bitmap_offset;
....@@ -604,9 +603,9 @@
604603 return 0;
605604 }
606605
607
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
606
+static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
608607 {
609
- unsigned long modtime;
608
+ time64_t modtime;
610609 unsigned long long actual;
611610 int err;
612611
....@@ -632,7 +631,7 @@
632631 return -EINVAL;
633632 }
634633 if (modtime != mtime) {
635
- printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
634
+ printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
636635 "backing file\n", mtime, modtime);
637636 return -EINVAL;
638637 }
....@@ -675,7 +674,7 @@
675674 unsigned long *bitmap_len_out, int *data_offset_out,
676675 int *create_cow_out)
677676 {
678
- time_t mtime;
677
+ time64_t mtime;
679678 unsigned long long size;
680679 __u32 version, align;
681680 char *backing_file;
....@@ -805,7 +804,7 @@
805804
806805 if((fd == -ENOENT) && create_cow){
807806 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
808
- ubd_dev->openflags, 1 << 9, PAGE_SIZE,
807
+ ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
809808 &ubd_dev->cow.bitmap_offset,
810809 &ubd_dev->cow.bitmap_len,
811810 &ubd_dev->cow.data_offset);
....@@ -846,6 +845,14 @@
846845 if(err < 0) goto error;
847846 ubd_dev->cow.fd = err;
848847 }
848
+ if (ubd_dev->no_trim == 0) {
849
+ ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
850
+ ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
851
+ blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
852
+ blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
853
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
854
+ }
855
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
849856 return 0;
850857 error:
851858 os_close_file(ubd_dev->fd);
....@@ -857,6 +864,7 @@
857864 struct ubd *ubd_dev = dev_get_drvdata(dev);
858865
859866 blk_cleanup_queue(ubd_dev->queue);
867
+ blk_mq_free_tag_set(&ubd_dev->tag_set);
860868 *ubd_dev = ((struct ubd) DEFAULT_UBD);
861869 }
862870
....@@ -891,13 +899,17 @@
891899
892900 disk->private_data = &ubd_devs[unit];
893901 disk->queue = ubd_devs[unit].queue;
894
- device_add_disk(parent, disk);
902
+ device_add_disk(parent, disk, NULL);
895903
896904 *disk_out = disk;
897905 return 0;
898906 }
899907
900
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
908
+#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
909
+
910
+static const struct blk_mq_ops ubd_mq_ops = {
911
+ .queue_rq = ubd_queue_rq,
912
+};
901913
902914 static int ubd_add(int n, char **error_out)
903915 {
....@@ -915,23 +927,32 @@
915927
916928 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
917929
918
- INIT_LIST_HEAD(&ubd_dev->restart);
919
- sg_init_table(ubd_dev->sg, MAX_SG);
930
+ ubd_dev->tag_set.ops = &ubd_mq_ops;
931
+ ubd_dev->tag_set.queue_depth = 64;
932
+ ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
933
+ ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
934
+ ubd_dev->tag_set.driver_data = ubd_dev;
935
+ ubd_dev->tag_set.nr_hw_queues = 1;
920936
921
- err = -ENOMEM;
922
- ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
923
- if (ubd_dev->queue == NULL) {
924
- *error_out = "Failed to initialize device queue";
937
+ err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
938
+ if (err)
925939 goto out;
940
+
941
+ ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
942
+ if (IS_ERR(ubd_dev->queue)) {
943
+ err = PTR_ERR(ubd_dev->queue);
944
+ goto out_cleanup_tags;
926945 }
946
+
927947 ubd_dev->queue->queuedata = ubd_dev;
928948 blk_queue_write_cache(ubd_dev->queue, true, false);
929949
930950 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
951
+ blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
931952 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
932953 if(err){
933954 *error_out = "Failed to register device";
934
- goto out_cleanup;
955
+ goto out_cleanup_tags;
935956 }
936957
937958 if (fake_major != UBD_MAJOR)
....@@ -949,8 +970,10 @@
949970 out:
950971 return err;
951972
952
-out_cleanup:
953
- blk_cleanup_queue(ubd_dev->queue);
973
+out_cleanup_tags:
974
+ blk_mq_free_tag_set(&ubd_dev->tag_set);
975
+ if (!(IS_ERR(ubd_dev->queue)))
976
+ blk_cleanup_queue(ubd_dev->queue);
954977 goto out;
955978 }
956979
....@@ -1235,10 +1258,10 @@
12351258 __u64 bitmap_offset, unsigned long *bitmap_words,
12361259 __u64 bitmap_len)
12371260 {
1238
- __u64 sector = io_offset >> 9;
1261
+ __u64 sector = io_offset >> SECTOR_SHIFT;
12391262 int i, update_bitmap = 0;
12401263
1241
- for(i = 0; i < length >> 9; i++){
1264
+ for (i = 0; i < length >> SECTOR_SHIFT; i++) {
12421265 if(cow_mask != NULL)
12431266 ubd_set_bit(i, (unsigned char *) cow_mask);
12441267 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
....@@ -1269,144 +1292,160 @@
12691292 *cow_offset += bitmap_offset;
12701293 }
12711294
1272
-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1295
+static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1296
+ unsigned long offset, unsigned long *bitmap,
12731297 __u64 bitmap_offset, __u64 bitmap_len)
12741298 {
1275
- __u64 sector = req->offset >> 9;
1299
+ __u64 sector = offset >> SECTOR_SHIFT;
12761300 int i;
12771301
1278
- if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1302
+ if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
12791303 panic("Operation too long");
12801304
1281
- if(req->op == UBD_READ) {
1282
- for(i = 0; i < req->length >> 9; i++){
1305
+ if (req_op(req->req) == REQ_OP_READ) {
1306
+ for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
12831307 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
12841308 ubd_set_bit(i, (unsigned char *)
1285
- &req->sector_mask);
1309
+ &segment->sector_mask);
1310
+ }
1311
+ } else {
1312
+ cowify_bitmap(offset, segment->length, &segment->sector_mask,
1313
+ &segment->cow_offset, bitmap, bitmap_offset,
1314
+ segment->bitmap_words, bitmap_len);
1315
+ }
1316
+}
1317
+
1318
+static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1319
+ struct request *req)
1320
+{
1321
+ struct bio_vec bvec;
1322
+ struct req_iterator iter;
1323
+ int i = 0;
1324
+ unsigned long byte_offset = io_req->offset;
1325
+ int op = req_op(req);
1326
+
1327
+ if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1328
+ io_req->io_desc[0].buffer = NULL;
1329
+ io_req->io_desc[0].length = blk_rq_bytes(req);
1330
+ } else {
1331
+ rq_for_each_segment(bvec, req, iter) {
1332
+ BUG_ON(i >= io_req->desc_cnt);
1333
+
1334
+ io_req->io_desc[i].buffer =
1335
+ page_address(bvec.bv_page) + bvec.bv_offset;
1336
+ io_req->io_desc[i].length = bvec.bv_len;
1337
+ i++;
12861338 }
12871339 }
1288
- else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1289
- &req->cow_offset, bitmap, bitmap_offset,
1290
- req->bitmap_words, bitmap_len);
1291
-}
12921340
1293
-/* Called with dev->lock held */
1294
-static void prepare_request(struct request *req, struct io_thread_req *io_req,
1295
- unsigned long long offset, int page_offset,
1296
- int len, struct page *page)
1297
-{
1298
- struct gendisk *disk = req->rq_disk;
1299
- struct ubd *ubd_dev = disk->private_data;
1341
+ if (dev->cow.file) {
1342
+ for (i = 0; i < io_req->desc_cnt; i++) {
1343
+ cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1344
+ dev->cow.bitmap, dev->cow.bitmap_offset,
1345
+ dev->cow.bitmap_len);
1346
+ byte_offset += io_req->io_desc[i].length;
1347
+ }
13001348
1301
- io_req->req = req;
1302
- io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1303
- ubd_dev->fd;
1304
- io_req->fds[1] = ubd_dev->fd;
1305
- io_req->cow_offset = -1;
1306
- io_req->offset = offset;
1307
- io_req->length = len;
1308
- io_req->error = 0;
1309
- io_req->sector_mask = 0;
1310
-
1311
- io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1312
- io_req->offsets[0] = 0;
1313
- io_req->offsets[1] = ubd_dev->cow.data_offset;
1314
- io_req->buffer = page_address(page) + page_offset;
1315
- io_req->sectorsize = 1 << 9;
1316
-
1317
- if(ubd_dev->cow.file != NULL)
1318
- cowify_req(io_req, ubd_dev->cow.bitmap,
1319
- ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1320
-
1321
-}
1322
-
1323
-/* Called with dev->lock held */
1324
-static void prepare_flush_request(struct request *req,
1325
- struct io_thread_req *io_req)
1326
-{
1327
- struct gendisk *disk = req->rq_disk;
1328
- struct ubd *ubd_dev = disk->private_data;
1329
-
1330
- io_req->req = req;
1331
- io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1332
- ubd_dev->fd;
1333
- io_req->op = UBD_FLUSH;
1334
-}
1335
-
1336
-static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1337
-{
1338
- int n = os_write_file(thread_fd, &io_req,
1339
- sizeof(io_req));
1340
- if (n != sizeof(io_req)) {
1341
- if (n != -EAGAIN)
1342
- printk("write to io thread failed, "
1343
- "errno = %d\n", -n);
1344
- else if (list_empty(&dev->restart))
1345
- list_add(&dev->restart, &restart);
1346
-
1347
- kfree(io_req);
1348
- return false;
13491349 }
1350
- return true;
13511350 }
13521351
1353
-/* Called with dev->lock held */
1354
-static void do_ubd_request(struct request_queue *q)
1352
+static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1353
+ int desc_cnt)
13551354 {
13561355 struct io_thread_req *io_req;
1357
- struct request *req;
1356
+ int i;
13581357
1359
- while(1){
1360
- struct ubd *dev = q->queuedata;
1361
- if(dev->request == NULL){
1362
- struct request *req = blk_fetch_request(q);
1363
- if(req == NULL)
1364
- return;
1358
+ io_req = kmalloc(sizeof(*io_req) +
1359
+ (desc_cnt * sizeof(struct io_desc)),
1360
+ GFP_ATOMIC);
1361
+ if (!io_req)
1362
+ return NULL;
13651363
1366
- dev->request = req;
1367
- dev->rq_pos = blk_rq_pos(req);
1368
- dev->start_sg = 0;
1369
- dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1370
- }
1364
+ io_req->req = req;
1365
+ if (dev->cow.file)
1366
+ io_req->fds[0] = dev->cow.fd;
1367
+ else
1368
+ io_req->fds[0] = dev->fd;
1369
+ io_req->error = 0;
1370
+ io_req->sectorsize = SECTOR_SIZE;
1371
+ io_req->fds[1] = dev->fd;
1372
+ io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1373
+ io_req->offsets[0] = 0;
1374
+ io_req->offsets[1] = dev->cow.data_offset;
13711375
1372
- req = dev->request;
1373
-
1374
- if (req_op(req) == REQ_OP_FLUSH) {
1375
- io_req = kmalloc(sizeof(struct io_thread_req),
1376
- GFP_ATOMIC);
1377
- if (io_req == NULL) {
1378
- if (list_empty(&dev->restart))
1379
- list_add(&dev->restart, &restart);
1380
- return;
1381
- }
1382
- prepare_flush_request(req, io_req);
1383
- if (submit_request(io_req, dev) == false)
1384
- return;
1385
- }
1386
-
1387
- while(dev->start_sg < dev->end_sg){
1388
- struct scatterlist *sg = &dev->sg[dev->start_sg];
1389
-
1390
- io_req = kmalloc(sizeof(struct io_thread_req),
1391
- GFP_ATOMIC);
1392
- if(io_req == NULL){
1393
- if(list_empty(&dev->restart))
1394
- list_add(&dev->restart, &restart);
1395
- return;
1396
- }
1397
- prepare_request(req, io_req,
1398
- (unsigned long long)dev->rq_pos << 9,
1399
- sg->offset, sg->length, sg_page(sg));
1400
-
1401
- if (submit_request(io_req, dev) == false)
1402
- return;
1403
-
1404
- dev->rq_pos += sg->length >> 9;
1405
- dev->start_sg++;
1406
- }
1407
- dev->end_sg = 0;
1408
- dev->request = NULL;
1376
+ for (i = 0 ; i < desc_cnt; i++) {
1377
+ io_req->io_desc[i].sector_mask = 0;
1378
+ io_req->io_desc[i].cow_offset = -1;
14091379 }
1380
+
1381
+ return io_req;
1382
+}
1383
+
1384
+static int ubd_submit_request(struct ubd *dev, struct request *req)
1385
+{
1386
+ int segs = 0;
1387
+ struct io_thread_req *io_req;
1388
+ int ret;
1389
+ int op = req_op(req);
1390
+
1391
+ if (op == REQ_OP_FLUSH)
1392
+ segs = 0;
1393
+ else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1394
+ segs = 1;
1395
+ else
1396
+ segs = blk_rq_nr_phys_segments(req);
1397
+
1398
+ io_req = ubd_alloc_req(dev, req, segs);
1399
+ if (!io_req)
1400
+ return -ENOMEM;
1401
+
1402
+ io_req->desc_cnt = segs;
1403
+ if (segs)
1404
+ ubd_map_req(dev, io_req, req);
1405
+
1406
+ ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1407
+ if (ret != sizeof(io_req)) {
1408
+ if (ret != -EAGAIN)
1409
+ pr_err("write to io thread failed: %d\n", -ret);
1410
+ kfree(io_req);
1411
+ }
1412
+ return ret;
1413
+}
1414
+
1415
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1416
+ const struct blk_mq_queue_data *bd)
1417
+{
1418
+ struct ubd *ubd_dev = hctx->queue->queuedata;
1419
+ struct request *req = bd->rq;
1420
+ int ret = 0, res = BLK_STS_OK;
1421
+
1422
+ blk_mq_start_request(req);
1423
+
1424
+ spin_lock_irq(&ubd_dev->lock);
1425
+
1426
+ switch (req_op(req)) {
1427
+ case REQ_OP_FLUSH:
1428
+ case REQ_OP_READ:
1429
+ case REQ_OP_WRITE:
1430
+ case REQ_OP_DISCARD:
1431
+ case REQ_OP_WRITE_ZEROES:
1432
+ ret = ubd_submit_request(ubd_dev, req);
1433
+ break;
1434
+ default:
1435
+ WARN_ON_ONCE(1);
1436
+ res = BLK_STS_NOTSUPP;
1437
+ }
1438
+
1439
+ spin_unlock_irq(&ubd_dev->lock);
1440
+
1441
+ if (ret < 0) {
1442
+ if (ret == -ENOMEM)
1443
+ res = BLK_STS_RESOURCE;
1444
+ else
1445
+ res = BLK_STS_DEV_RESOURCE;
1446
+ }
1447
+
1448
+ return res;
14101449 }
14111450
14121451 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
....@@ -1451,85 +1490,118 @@
14511490 return -EINVAL;
14521491 }
14531492
1454
-static int update_bitmap(struct io_thread_req *req)
1493
+static int map_error(int error_code)
1494
+{
1495
+ switch (error_code) {
1496
+ case 0:
1497
+ return BLK_STS_OK;
1498
+ case ENOSYS:
1499
+ case EOPNOTSUPP:
1500
+ return BLK_STS_NOTSUPP;
1501
+ case ENOSPC:
1502
+ return BLK_STS_NOSPC;
1503
+ }
1504
+ return BLK_STS_IOERR;
1505
+}
1506
+
1507
+/*
1508
+ * Everything from here onwards *IS NOT PART OF THE KERNEL*
1509
+ *
1510
+ * The following functions are part of UML hypervisor code.
1511
+ * All functions from here onwards are executed as a helper
1512
+ * thread and are not allowed to execute any kernel functions.
1513
+ *
1514
+ * Any communication must occur strictly via shared memory and IPC.
1515
+ *
1516
+ * Do not add printks, locks, kernel memory operations, etc - it
1517
+ * will result in unpredictable behaviour and/or crashes.
1518
+ */
1519
+
1520
+static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
14551521 {
14561522 int n;
14571523
1458
- if(req->cow_offset == -1)
1459
- return 0;
1524
+ if (segment->cow_offset == -1)
1525
+ return map_error(0);
14601526
1461
- n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1462
- sizeof(req->bitmap_words), req->cow_offset);
1463
- if(n != sizeof(req->bitmap_words)){
1464
- printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1465
- req->fds[1]);
1466
- return 1;
1467
- }
1527
+ n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1528
+ sizeof(segment->bitmap_words), segment->cow_offset);
1529
+ if (n != sizeof(segment->bitmap_words))
1530
+ return map_error(-n);
14681531
1469
- return 0;
1532
+ return map_error(0);
14701533 }
14711534
1472
-static void do_io(struct io_thread_req *req)
1535
+static void do_io(struct io_thread_req *req, struct io_desc *desc)
14731536 {
1474
- char *buf;
1537
+ char *buf = NULL;
14751538 unsigned long len;
14761539 int n, nsectors, start, end, bit;
14771540 __u64 off;
14781541
1479
- if (req->op == UBD_FLUSH) {
1542
+ /* FLUSH is really a special case, we cannot "case" it with others */
1543
+
1544
+ if (req_op(req->req) == REQ_OP_FLUSH) {
14801545 /* fds[0] is always either the rw image or our cow file */
1481
- n = os_sync_file(req->fds[0]);
1482
- if (n != 0) {
1483
- printk("do_io - sync failed err = %d "
1484
- "fd = %d\n", -n, req->fds[0]);
1485
- req->error = 1;
1486
- }
1546
+ req->error = map_error(-os_sync_file(req->fds[0]));
14871547 return;
14881548 }
14891549
1490
- nsectors = req->length / req->sectorsize;
1550
+ nsectors = desc->length / req->sectorsize;
14911551 start = 0;
14921552 do {
1493
- bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1553
+ bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
14941554 end = start;
14951555 while((end < nsectors) &&
1496
- (ubd_test_bit(end, (unsigned char *)
1497
- &req->sector_mask) == bit))
1556
+ (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
14981557 end++;
14991558
15001559 off = req->offset + req->offsets[bit] +
15011560 start * req->sectorsize;
15021561 len = (end - start) * req->sectorsize;
1503
- buf = &req->buffer[start * req->sectorsize];
1562
+ if (desc->buffer != NULL)
1563
+ buf = &desc->buffer[start * req->sectorsize];
15041564
1505
- if(req->op == UBD_READ){
1565
+ switch (req_op(req->req)) {
1566
+ case REQ_OP_READ:
15061567 n = 0;
15071568 do {
15081569 buf = &buf[n];
15091570 len -= n;
15101571 n = os_pread_file(req->fds[bit], buf, len, off);
15111572 if (n < 0) {
1512
- printk("do_io - read failed, err = %d "
1513
- "fd = %d\n", -n, req->fds[bit]);
1514
- req->error = 1;
1573
+ req->error = map_error(-n);
15151574 return;
15161575 }
15171576 } while((n < len) && (n != 0));
15181577 if (n < len) memset(&buf[n], 0, len - n);
1519
- } else {
1578
+ break;
1579
+ case REQ_OP_WRITE:
15201580 n = os_pwrite_file(req->fds[bit], buf, len, off);
15211581 if(n != len){
1522
- printk("do_io - write failed err = %d "
1523
- "fd = %d\n", -n, req->fds[bit]);
1524
- req->error = 1;
1582
+ req->error = map_error(-n);
15251583 return;
15261584 }
1585
+ break;
1586
+ case REQ_OP_DISCARD:
1587
+ case REQ_OP_WRITE_ZEROES:
1588
+ n = os_falloc_punch(req->fds[bit], off, len);
1589
+ if (n) {
1590
+ req->error = map_error(-n);
1591
+ return;
1592
+ }
1593
+ break;
1594
+ default:
1595
+ WARN_ON_ONCE(1);
1596
+ req->error = BLK_STS_NOTSUPP;
1597
+ return;
15271598 }
15281599
15291600 start = end;
15301601 } while(start < nsectors);
15311602
1532
- req->error = update_bitmap(req);
1603
+ req->offset += len;
1604
+ req->error = update_bitmap(req, desc);
15331605 }
15341606
15351607 /* Changed in start_io_thread, which is serialized by being called only
....@@ -1554,21 +1626,21 @@
15541626 &io_remainder_size,
15551627 UBD_REQ_BUFFER_SIZE
15561628 );
1557
- if (n < 0) {
1558
- if (n == -EAGAIN) {
1629
+ if (n <= 0) {
1630
+ if (n == -EAGAIN)
15591631 ubd_read_poll(-1);
1560
- continue;
1561
- } else {
1562
- printk("io_thread - read failed, fd = %d, "
1563
- "err = %d,"
1564
- "reminder = %d\n",
1565
- kernel_fd, -n, io_remainder_size);
1566
- }
1632
+
1633
+ continue;
15671634 }
15681635
15691636 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1637
+ struct io_thread_req *req = (*io_req_buffer)[count];
1638
+ int i;
1639
+
15701640 io_count++;
1571
- do_io((*io_req_buffer)[count]);
1641
+ for (i = 0; !req->error && i < req->desc_cnt; i++)
1642
+ do_io(req, &(req->io_desc[i]));
1643
+
15721644 }
15731645
15741646 written = 0;
....@@ -1579,11 +1651,6 @@
15791651 n - written);
15801652 if (res >= 0) {
15811653 written += res;
1582
- } else {
1583
- if (res != -EAGAIN) {
1584
- printk("io_thread - write failed, fd = %d, "
1585
- "err = %d\n", kernel_fd, -n);
1586
- }
15871654 }
15881655 if (written < n) {
15891656 ubd_write_poll(-1);