hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/virtio/virtio_ring.c
....@@ -1,20 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /* Virtio ring implementation.
23 *
34 * Copyright 2007 Rusty Russell IBM Corporation
4
- *
5
- * This program is free software; you can redistribute it and/or modify
6
- * it under the terms of the GNU General Public License as published by
7
- * the Free Software Foundation; either version 2 of the License, or
8
- * (at your option) any later version.
9
- *
10
- * This program is distributed in the hope that it will be useful,
11
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- * GNU General Public License for more details.
14
- *
15
- * You should have received a copy of the GNU General Public License
16
- * along with this program; if not, write to the Free Software
17
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
185 */
196 #include <linux/virtio.h>
207 #include <linux/virtio_ring.h>
....@@ -44,6 +31,26 @@
4431 } while (0)
4532 #define END_USE(_vq) \
4633 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
34
+#define LAST_ADD_TIME_UPDATE(_vq) \
35
+ do { \
36
+ ktime_t now = ktime_get(); \
37
+ \
38
+ /* No kick or get, with .1 second between? Warn. */ \
39
+ if ((_vq)->last_add_time_valid) \
40
+ WARN_ON(ktime_to_ms(ktime_sub(now, \
41
+ (_vq)->last_add_time)) > 100); \
42
+ (_vq)->last_add_time = now; \
43
+ (_vq)->last_add_time_valid = true; \
44
+ } while (0)
45
+#define LAST_ADD_TIME_CHECK(_vq) \
46
+ do { \
47
+ if ((_vq)->last_add_time_valid) { \
48
+ WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
49
+ (_vq)->last_add_time)) > 100); \
50
+ } \
51
+ } while (0)
52
+#define LAST_ADD_TIME_INVALID(_vq) \
53
+ ((_vq)->last_add_time_valid = false)
4754 #else
4855 #define BAD_RING(_vq, fmt, args...) \
4956 do { \
....@@ -53,18 +60,38 @@
5360 } while (0)
5461 #define START_USE(vq)
5562 #define END_USE(vq)
63
+#define LAST_ADD_TIME_UPDATE(vq)
64
+#define LAST_ADD_TIME_CHECK(vq)
65
+#define LAST_ADD_TIME_INVALID(vq)
5666 #endif
5767
58
-struct vring_desc_state {
68
+struct vring_desc_state_split {
5969 void *data; /* Data for callback. */
6070 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
71
+};
72
+
73
+struct vring_desc_state_packed {
74
+ void *data; /* Data for callback. */
75
+ struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
76
+ u16 num; /* Descriptor list length. */
77
+ u16 next; /* The next desc state in a list. */
78
+ u16 last; /* The last desc state in a list. */
79
+};
80
+
81
+struct vring_desc_extra_packed {
82
+ dma_addr_t addr; /* Buffer DMA addr. */
83
+ u32 len; /* Buffer length. */
84
+ u16 flags; /* Descriptor flags. */
6185 };
6286
6387 struct vring_virtqueue {
6488 struct virtqueue vq;
6589
66
- /* Actual memory layout for this queue */
67
- struct vring vring;
90
+ /* Is this a packed ring? */
91
+ bool packed_ring;
92
+
93
+ /* Is DMA API used? */
94
+ bool use_dma_api;
6895
6996 /* Can we use weak barriers? */
7097 bool weak_barriers;
....@@ -86,19 +113,75 @@
86113 /* Last used index we've seen. */
87114 u16 last_used_idx;
88115
89
- /* Last written value to avail->flags */
90
- u16 avail_flags_shadow;
116
+ union {
117
+ /* Available for split ring */
118
+ struct {
119
+ /* Actual memory layout for this queue. */
120
+ struct vring vring;
91121
92
- /* Last written value to avail->idx in guest byte order */
93
- u16 avail_idx_shadow;
122
+ /* Last written value to avail->flags */
123
+ u16 avail_flags_shadow;
124
+
125
+ /*
126
+ * Last written value to avail->idx in
127
+ * guest byte order.
128
+ */
129
+ u16 avail_idx_shadow;
130
+
131
+ /* Per-descriptor state. */
132
+ struct vring_desc_state_split *desc_state;
133
+
134
+ /* DMA address and size information */
135
+ dma_addr_t queue_dma_addr;
136
+ size_t queue_size_in_bytes;
137
+ } split;
138
+
139
+ /* Available for packed ring */
140
+ struct {
141
+ /* Actual memory layout for this queue. */
142
+ struct {
143
+ unsigned int num;
144
+ struct vring_packed_desc *desc;
145
+ struct vring_packed_desc_event *driver;
146
+ struct vring_packed_desc_event *device;
147
+ } vring;
148
+
149
+ /* Driver ring wrap counter. */
150
+ bool avail_wrap_counter;
151
+
152
+ /* Device ring wrap counter. */
153
+ bool used_wrap_counter;
154
+
155
+ /* Avail used flags. */
156
+ u16 avail_used_flags;
157
+
158
+ /* Index of the next avail descriptor. */
159
+ u16 next_avail_idx;
160
+
161
+ /*
162
+ * Last written value to driver->flags in
163
+ * guest byte order.
164
+ */
165
+ u16 event_flags_shadow;
166
+
167
+ /* Per-descriptor state. */
168
+ struct vring_desc_state_packed *desc_state;
169
+ struct vring_desc_extra_packed *desc_extra;
170
+
171
+ /* DMA address and size information */
172
+ dma_addr_t ring_dma_addr;
173
+ dma_addr_t driver_event_dma_addr;
174
+ dma_addr_t device_event_dma_addr;
175
+ size_t ring_size_in_bytes;
176
+ size_t event_size_in_bytes;
177
+ } packed;
178
+ };
94179
95180 /* How to notify other side. FIXME: commonalize hcalls! */
96181 bool (*notify)(struct virtqueue *vq);
97182
98183 /* DMA, allocation, and size information */
99184 bool we_own_ring;
100
- size_t queue_size_in_bytes;
101
- dma_addr_t queue_dma_addr;
102185
103186 #ifdef DEBUG
104187 /* They're supposed to lock for us. */
....@@ -108,12 +191,26 @@
108191 bool last_add_time_valid;
109192 ktime_t last_add_time;
110193 #endif
111
-
112
- /* Per-descriptor state. */
113
- struct vring_desc_state desc_state[];
114194 };
115195
196
+
197
+/*
198
+ * Helpers.
199
+ */
200
+
116201 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
202
+
203
+static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
204
+ unsigned int total_sg)
205
+{
206
+ struct vring_virtqueue *vq = to_vvq(_vq);
207
+
208
+ /*
209
+ * If the host supports indirect descriptor tables, and we have multiple
210
+ * buffers, then go indirect. FIXME: tune this threshold
211
+ */
212
+ return (vq->indirect && total_sg > 1 && vq->vq.num_free);
213
+}
117214
118215 /*
119216 * Modern virtio devices have feature bits to specify whether they need a
....@@ -143,7 +240,7 @@
143240
144241 static bool vring_use_dma_api(struct virtio_device *vdev)
145242 {
146
- if (!virtio_has_iommu_quirk(vdev))
243
+ if (!virtio_has_dma_quirk(vdev))
147244 return true;
148245
149246 /* Otherwise, we are left to guess. */
....@@ -161,6 +258,59 @@
161258 return false;
162259 }
163260
261
+size_t virtio_max_dma_size(struct virtio_device *vdev)
262
+{
263
+ size_t max_segment_size = SIZE_MAX;
264
+
265
+ if (vring_use_dma_api(vdev))
266
+ max_segment_size = dma_max_mapping_size(vdev->dev.parent);
267
+
268
+ return max_segment_size;
269
+}
270
+EXPORT_SYMBOL_GPL(virtio_max_dma_size);
271
+
272
+static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
273
+ dma_addr_t *dma_handle, gfp_t flag)
274
+{
275
+ if (vring_use_dma_api(vdev)) {
276
+ return dma_alloc_coherent(vdev->dev.parent, size,
277
+ dma_handle, flag);
278
+ } else {
279
+ void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
280
+
281
+ if (queue) {
282
+ phys_addr_t phys_addr = virt_to_phys(queue);
283
+ *dma_handle = (dma_addr_t)phys_addr;
284
+
285
+ /*
286
+ * Sanity check: make sure we dind't truncate
287
+ * the address. The only arches I can find that
288
+ * have 64-bit phys_addr_t but 32-bit dma_addr_t
289
+ * are certain non-highmem MIPS and x86
290
+ * configurations, but these configurations
291
+ * should never allocate physical pages above 32
292
+ * bits, so this is fine. Just in case, throw a
293
+ * warning and abort if we end up with an
294
+ * unrepresentable address.
295
+ */
296
+ if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
297
+ free_pages_exact(queue, PAGE_ALIGN(size));
298
+ return NULL;
299
+ }
300
+ }
301
+ return queue;
302
+ }
303
+}
304
+
305
+static void vring_free_queue(struct virtio_device *vdev, size_t size,
306
+ void *queue, dma_addr_t dma_handle)
307
+{
308
+ if (vring_use_dma_api(vdev))
309
+ dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
310
+ else
311
+ free_pages_exact(queue, PAGE_ALIGN(size));
312
+}
313
+
164314 /*
165315 * The DMA ops on various arches are rather gnarly right now, and
166316 * making all of the arch DMA ops work on the vring device itself
....@@ -176,7 +326,7 @@
176326 struct scatterlist *sg,
177327 enum dma_data_direction direction)
178328 {
179
- if (!vring_use_dma_api(vq->vq.vdev))
329
+ if (!vq->use_dma_api)
180330 return (dma_addr_t)sg_phys(sg);
181331
182332 /*
....@@ -193,19 +343,33 @@
193343 void *cpu_addr, size_t size,
194344 enum dma_data_direction direction)
195345 {
196
- if (!vring_use_dma_api(vq->vq.vdev))
346
+ if (!vq->use_dma_api)
197347 return (dma_addr_t)virt_to_phys(cpu_addr);
198348
199349 return dma_map_single(vring_dma_dev(vq),
200350 cpu_addr, size, direction);
201351 }
202352
203
-static void vring_unmap_one(const struct vring_virtqueue *vq,
204
- struct vring_desc *desc)
353
+static int vring_mapping_error(const struct vring_virtqueue *vq,
354
+ dma_addr_t addr)
355
+{
356
+ if (!vq->use_dma_api)
357
+ return 0;
358
+
359
+ return dma_mapping_error(vring_dma_dev(vq), addr);
360
+}
361
+
362
+
363
+/*
364
+ * Split ring specific functions - *_split().
365
+ */
366
+
367
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
368
+ struct vring_desc *desc)
205369 {
206370 u16 flags;
207371
208
- if (!vring_use_dma_api(vq->vq.vdev))
372
+ if (!vq->use_dma_api)
209373 return;
210374
211375 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
....@@ -225,17 +389,9 @@
225389 }
226390 }
227391
228
-static int vring_mapping_error(const struct vring_virtqueue *vq,
229
- dma_addr_t addr)
230
-{
231
- if (!vring_use_dma_api(vq->vq.vdev))
232
- return 0;
233
-
234
- return dma_mapping_error(vring_dma_dev(vq), addr);
235
-}
236
-
237
-static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
238
- unsigned int total_sg, gfp_t gfp)
392
+static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
393
+ unsigned int total_sg,
394
+ gfp_t gfp)
239395 {
240396 struct vring_desc *desc;
241397 unsigned int i;
....@@ -256,19 +412,19 @@
256412 return desc;
257413 }
258414
259
-static inline int virtqueue_add(struct virtqueue *_vq,
260
- struct scatterlist *sgs[],
261
- unsigned int total_sg,
262
- unsigned int out_sgs,
263
- unsigned int in_sgs,
264
- void *data,
265
- void *ctx,
266
- gfp_t gfp)
415
+static inline int virtqueue_add_split(struct virtqueue *_vq,
416
+ struct scatterlist *sgs[],
417
+ unsigned int total_sg,
418
+ unsigned int out_sgs,
419
+ unsigned int in_sgs,
420
+ void *data,
421
+ void *ctx,
422
+ gfp_t gfp)
267423 {
268424 struct vring_virtqueue *vq = to_vvq(_vq);
269425 struct scatterlist *sg;
270426 struct vring_desc *desc;
271
- unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
427
+ unsigned int i, n, avail, descs_used, prev, err_idx;
272428 int head;
273429 bool indirect;
274430
....@@ -282,30 +438,17 @@
282438 return -EIO;
283439 }
284440
285
-#ifdef DEBUG
286
- {
287
- ktime_t now = ktime_get();
288
-
289
- /* No kick or get, with .1 second between? Warn. */
290
- if (vq->last_add_time_valid)
291
- WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
292
- > 100);
293
- vq->last_add_time = now;
294
- vq->last_add_time_valid = true;
295
- }
296
-#endif
441
+ LAST_ADD_TIME_UPDATE(vq);
297442
298443 BUG_ON(total_sg == 0);
299444
300445 head = vq->free_head;
301446
302
- /* If the host supports indirect descriptor tables, and we have multiple
303
- * buffers, then go indirect. FIXME: tune this threshold */
304
- if (vq->indirect && total_sg > 1 && vq->vq.num_free)
305
- desc = alloc_indirect(_vq, total_sg, gfp);
447
+ if (virtqueue_use_indirect(_vq, total_sg))
448
+ desc = alloc_indirect_split(_vq, total_sg, gfp);
306449 else {
307450 desc = NULL;
308
- WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
451
+ WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
309452 }
310453
311454 if (desc) {
....@@ -316,7 +459,7 @@
316459 descs_used = 1;
317460 } else {
318461 indirect = false;
319
- desc = vq->vring.desc;
462
+ desc = vq->split.vring.desc;
320463 i = head;
321464 descs_used = total_sg;
322465 }
....@@ -372,10 +515,13 @@
372515 if (vring_mapping_error(vq, addr))
373516 goto unmap_release;
374517
375
- vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
376
- vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
518
+ vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
519
+ VRING_DESC_F_INDIRECT);
520
+ vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
521
+ addr);
377522
378
- vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
523
+ vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
524
+ total_sg * sizeof(struct vring_desc));
379525 }
380526
381527 /* We're using some buffers from the free list. */
....@@ -383,27 +529,29 @@
383529
384530 /* Update free pointer */
385531 if (indirect)
386
- vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
532
+ vq->free_head = virtio16_to_cpu(_vq->vdev,
533
+ vq->split.vring.desc[head].next);
387534 else
388535 vq->free_head = i;
389536
390537 /* Store token and indirect buffer state. */
391
- vq->desc_state[head].data = data;
538
+ vq->split.desc_state[head].data = data;
392539 if (indirect)
393
- vq->desc_state[head].indir_desc = desc;
540
+ vq->split.desc_state[head].indir_desc = desc;
394541 else
395
- vq->desc_state[head].indir_desc = ctx;
542
+ vq->split.desc_state[head].indir_desc = ctx;
396543
397544 /* Put entry in available array (but don't update avail->idx until they
398545 * do sync). */
399
- avail = vq->avail_idx_shadow & (vq->vring.num - 1);
400
- vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
546
+ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
547
+ vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
401548
402549 /* Descriptors and available array need to be set before we expose the
403550 * new available array entries. */
404551 virtio_wmb(vq->weak_barriers);
405
- vq->avail_idx_shadow++;
406
- vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
552
+ vq->split.avail_idx_shadow++;
553
+ vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
554
+ vq->split.avail_idx_shadow);
407555 vq->num_added++;
408556
409557 pr_debug("Added buffer head %i to %p\n", head, vq);
....@@ -418,13 +566,17 @@
418566
419567 unmap_release:
420568 err_idx = i;
421
- i = head;
569
+
570
+ if (indirect)
571
+ i = 0;
572
+ else
573
+ i = head;
422574
423575 for (n = 0; n < total_sg; n++) {
424576 if (i == err_idx)
425577 break;
426
- vring_unmap_one(vq, &desc[i]);
427
- i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
578
+ vring_unmap_one_split(vq, &desc[i]);
579
+ i = virtio16_to_cpu(_vq->vdev, desc[i].next);
428580 }
429581
430582 if (indirect)
....@@ -434,12 +586,1143 @@
434586 return -ENOMEM;
435587 }
436588
589
+static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
590
+{
591
+ struct vring_virtqueue *vq = to_vvq(_vq);
592
+ u16 new, old;
593
+ bool needs_kick;
594
+
595
+ START_USE(vq);
596
+ /* We need to expose available array entries before checking avail
597
+ * event. */
598
+ virtio_mb(vq->weak_barriers);
599
+
600
+ old = vq->split.avail_idx_shadow - vq->num_added;
601
+ new = vq->split.avail_idx_shadow;
602
+ vq->num_added = 0;
603
+
604
+ LAST_ADD_TIME_CHECK(vq);
605
+ LAST_ADD_TIME_INVALID(vq);
606
+
607
+ if (vq->event) {
608
+ needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
609
+ vring_avail_event(&vq->split.vring)),
610
+ new, old);
611
+ } else {
612
+ needs_kick = !(vq->split.vring.used->flags &
613
+ cpu_to_virtio16(_vq->vdev,
614
+ VRING_USED_F_NO_NOTIFY));
615
+ }
616
+ END_USE(vq);
617
+ return needs_kick;
618
+}
619
+
620
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
621
+ void **ctx)
622
+{
623
+ unsigned int i, j;
624
+ __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
625
+
626
+ /* Clear data ptr. */
627
+ vq->split.desc_state[head].data = NULL;
628
+
629
+ /* Put back on free list: unmap first-level descriptors and find end */
630
+ i = head;
631
+
632
+ while (vq->split.vring.desc[i].flags & nextflag) {
633
+ vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
634
+ i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
635
+ vq->vq.num_free++;
636
+ }
637
+
638
+ vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
639
+ vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
640
+ vq->free_head);
641
+ vq->free_head = head;
642
+
643
+ /* Plus final descriptor */
644
+ vq->vq.num_free++;
645
+
646
+ if (vq->indirect) {
647
+ struct vring_desc *indir_desc =
648
+ vq->split.desc_state[head].indir_desc;
649
+ u32 len;
650
+
651
+ /* Free the indirect table, if any, now that it's unmapped. */
652
+ if (!indir_desc)
653
+ return;
654
+
655
+ len = virtio32_to_cpu(vq->vq.vdev,
656
+ vq->split.vring.desc[head].len);
657
+
658
+ BUG_ON(!(vq->split.vring.desc[head].flags &
659
+ cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
660
+ BUG_ON(len == 0 || len % sizeof(struct vring_desc));
661
+
662
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
663
+ vring_unmap_one_split(vq, &indir_desc[j]);
664
+
665
+ kfree(indir_desc);
666
+ vq->split.desc_state[head].indir_desc = NULL;
667
+ } else if (ctx) {
668
+ *ctx = vq->split.desc_state[head].indir_desc;
669
+ }
670
+}
671
+
672
+static inline bool more_used_split(const struct vring_virtqueue *vq)
673
+{
674
+ return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
675
+ vq->split.vring.used->idx);
676
+}
677
+
678
+static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
679
+ unsigned int *len,
680
+ void **ctx)
681
+{
682
+ struct vring_virtqueue *vq = to_vvq(_vq);
683
+ void *ret;
684
+ unsigned int i;
685
+ u16 last_used;
686
+
687
+ START_USE(vq);
688
+
689
+ if (unlikely(vq->broken)) {
690
+ END_USE(vq);
691
+ return NULL;
692
+ }
693
+
694
+ if (!more_used_split(vq)) {
695
+ pr_debug("No more buffers in queue\n");
696
+ END_USE(vq);
697
+ return NULL;
698
+ }
699
+
700
+ /* Only get used array entries after they have been exposed by host. */
701
+ virtio_rmb(vq->weak_barriers);
702
+
703
+ last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
704
+ i = virtio32_to_cpu(_vq->vdev,
705
+ vq->split.vring.used->ring[last_used].id);
706
+ *len = virtio32_to_cpu(_vq->vdev,
707
+ vq->split.vring.used->ring[last_used].len);
708
+
709
+ if (unlikely(i >= vq->split.vring.num)) {
710
+ BAD_RING(vq, "id %u out of range\n", i);
711
+ return NULL;
712
+ }
713
+ if (unlikely(!vq->split.desc_state[i].data)) {
714
+ BAD_RING(vq, "id %u is not a head!\n", i);
715
+ return NULL;
716
+ }
717
+
718
+ /* detach_buf_split clears data, so grab it now. */
719
+ ret = vq->split.desc_state[i].data;
720
+ detach_buf_split(vq, i, ctx);
721
+ vq->last_used_idx++;
722
+ /* If we expect an interrupt for the next entry, tell host
723
+ * by writing event index and flush out the write before
724
+ * the read in the next get_buf call. */
725
+ if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
726
+ virtio_store_mb(vq->weak_barriers,
727
+ &vring_used_event(&vq->split.vring),
728
+ cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
729
+
730
+ LAST_ADD_TIME_INVALID(vq);
731
+
732
+ END_USE(vq);
733
+ return ret;
734
+}
735
+
736
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
737
+{
738
+ struct vring_virtqueue *vq = to_vvq(_vq);
739
+
740
+ if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
741
+ vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
742
+ if (!vq->event)
743
+ vq->split.vring.avail->flags =
744
+ cpu_to_virtio16(_vq->vdev,
745
+ vq->split.avail_flags_shadow);
746
+ }
747
+}
748
+
749
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
750
+{
751
+ struct vring_virtqueue *vq = to_vvq(_vq);
752
+ u16 last_used_idx;
753
+
754
+ START_USE(vq);
755
+
756
+ /* We optimistically turn back on interrupts, then check if there was
757
+ * more to do. */
758
+ /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
759
+ * either clear the flags bit or point the event index at the next
760
+ * entry. Always do both to keep code simple. */
761
+ if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
762
+ vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
763
+ if (!vq->event)
764
+ vq->split.vring.avail->flags =
765
+ cpu_to_virtio16(_vq->vdev,
766
+ vq->split.avail_flags_shadow);
767
+ }
768
+ vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
769
+ last_used_idx = vq->last_used_idx);
770
+ END_USE(vq);
771
+ return last_used_idx;
772
+}
773
+
774
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
775
+{
776
+ struct vring_virtqueue *vq = to_vvq(_vq);
777
+
778
+ return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
779
+ vq->split.vring.used->idx);
780
+}
781
+
782
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
783
+{
784
+ struct vring_virtqueue *vq = to_vvq(_vq);
785
+ u16 bufs;
786
+
787
+ START_USE(vq);
788
+
789
+ /* We optimistically turn back on interrupts, then check if there was
790
+ * more to do. */
791
+ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
792
+ * either clear the flags bit or point the event index at the next
793
+ * entry. Always update the event index to keep code simple. */
794
+ if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
795
+ vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
796
+ if (!vq->event)
797
+ vq->split.vring.avail->flags =
798
+ cpu_to_virtio16(_vq->vdev,
799
+ vq->split.avail_flags_shadow);
800
+ }
801
+ /* TODO: tune this threshold */
802
+ bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
803
+
804
+ virtio_store_mb(vq->weak_barriers,
805
+ &vring_used_event(&vq->split.vring),
806
+ cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
807
+
808
+ if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
809
+ - vq->last_used_idx) > bufs)) {
810
+ END_USE(vq);
811
+ return false;
812
+ }
813
+
814
+ END_USE(vq);
815
+ return true;
816
+}
817
+
818
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
819
+{
820
+ struct vring_virtqueue *vq = to_vvq(_vq);
821
+ unsigned int i;
822
+ void *buf;
823
+
824
+ START_USE(vq);
825
+
826
+ for (i = 0; i < vq->split.vring.num; i++) {
827
+ if (!vq->split.desc_state[i].data)
828
+ continue;
829
+ /* detach_buf_split clears data, so grab it now. */
830
+ buf = vq->split.desc_state[i].data;
831
+ detach_buf_split(vq, i, NULL);
832
+ vq->split.avail_idx_shadow--;
833
+ vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
834
+ vq->split.avail_idx_shadow);
835
+ END_USE(vq);
836
+ return buf;
837
+ }
838
+ /* That should have freed everything. */
839
+ BUG_ON(vq->vq.num_free != vq->split.vring.num);
840
+
841
+ END_USE(vq);
842
+ return NULL;
843
+}
844
+
845
+static struct virtqueue *vring_create_virtqueue_split(
846
+ unsigned int index,
847
+ unsigned int num,
848
+ unsigned int vring_align,
849
+ struct virtio_device *vdev,
850
+ bool weak_barriers,
851
+ bool may_reduce_num,
852
+ bool context,
853
+ bool (*notify)(struct virtqueue *),
854
+ void (*callback)(struct virtqueue *),
855
+ const char *name)
856
+{
857
+ struct virtqueue *vq;
858
+ void *queue = NULL;
859
+ dma_addr_t dma_addr;
860
+ size_t queue_size_in_bytes;
861
+ struct vring vring;
862
+
863
+ /* We assume num is a power of 2. */
864
+ if (num & (num - 1)) {
865
+ dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
866
+ return NULL;
867
+ }
868
+
869
+ /* TODO: allocate each queue chunk individually */
870
+ for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
871
+ queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
872
+ &dma_addr,
873
+ GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
874
+ if (queue)
875
+ break;
876
+ if (!may_reduce_num)
877
+ return NULL;
878
+ }
879
+
880
+ if (!num)
881
+ return NULL;
882
+
883
+ if (!queue) {
884
+ /* Try to get a single page. You are my only hope! */
885
+ queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
886
+ &dma_addr, GFP_KERNEL|__GFP_ZERO);
887
+ }
888
+ if (!queue)
889
+ return NULL;
890
+
891
+ queue_size_in_bytes = vring_size(num, vring_align);
892
+ vring_init(&vring, num, queue, vring_align);
893
+
894
+ vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
895
+ notify, callback, name);
896
+ if (!vq) {
897
+ vring_free_queue(vdev, queue_size_in_bytes, queue,
898
+ dma_addr);
899
+ return NULL;
900
+ }
901
+
902
+ to_vvq(vq)->split.queue_dma_addr = dma_addr;
903
+ to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
904
+ to_vvq(vq)->we_own_ring = true;
905
+
906
+ return vq;
907
+}
908
+
909
+
910
+/*
911
+ * Packed ring specific functions - *_packed().
912
+ */
913
+
914
+static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
915
+ struct vring_desc_extra_packed *state)
916
+{
917
+ u16 flags;
918
+
919
+ if (!vq->use_dma_api)
920
+ return;
921
+
922
+ flags = state->flags;
923
+
924
+ if (flags & VRING_DESC_F_INDIRECT) {
925
+ dma_unmap_single(vring_dma_dev(vq),
926
+ state->addr, state->len,
927
+ (flags & VRING_DESC_F_WRITE) ?
928
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
929
+ } else {
930
+ dma_unmap_page(vring_dma_dev(vq),
931
+ state->addr, state->len,
932
+ (flags & VRING_DESC_F_WRITE) ?
933
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
934
+ }
935
+}
936
+
937
+static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
938
+ struct vring_packed_desc *desc)
939
+{
940
+ u16 flags;
941
+
942
+ if (!vq->use_dma_api)
943
+ return;
944
+
945
+ flags = le16_to_cpu(desc->flags);
946
+
947
+ if (flags & VRING_DESC_F_INDIRECT) {
948
+ dma_unmap_single(vring_dma_dev(vq),
949
+ le64_to_cpu(desc->addr),
950
+ le32_to_cpu(desc->len),
951
+ (flags & VRING_DESC_F_WRITE) ?
952
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
953
+ } else {
954
+ dma_unmap_page(vring_dma_dev(vq),
955
+ le64_to_cpu(desc->addr),
956
+ le32_to_cpu(desc->len),
957
+ (flags & VRING_DESC_F_WRITE) ?
958
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
959
+ }
960
+}
961
+
962
+static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
963
+ gfp_t gfp)
964
+{
965
+ struct vring_packed_desc *desc;
966
+
967
+ /*
968
+ * We require lowmem mappings for the descriptors because
969
+ * otherwise virt_to_phys will give us bogus addresses in the
970
+ * virtqueue.
971
+ */
972
+ gfp &= ~__GFP_HIGHMEM;
973
+
974
+ desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
975
+
976
+ return desc;
977
+}
978
+
979
+static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
980
+ struct scatterlist *sgs[],
981
+ unsigned int total_sg,
982
+ unsigned int out_sgs,
983
+ unsigned int in_sgs,
984
+ void *data,
985
+ gfp_t gfp)
986
+{
987
+ struct vring_packed_desc *desc;
988
+ struct scatterlist *sg;
989
+ unsigned int i, n, err_idx;
990
+ u16 head, id;
991
+ dma_addr_t addr;
992
+
993
+ head = vq->packed.next_avail_idx;
994
+ desc = alloc_indirect_packed(total_sg, gfp);
995
+ if (!desc)
996
+ return -ENOMEM;
997
+
998
+ if (unlikely(vq->vq.num_free < 1)) {
999
+ pr_debug("Can't add buf len 1 - avail = 0\n");
1000
+ kfree(desc);
1001
+ END_USE(vq);
1002
+ return -ENOSPC;
1003
+ }
1004
+
1005
+ i = 0;
1006
+ id = vq->free_head;
1007
+ BUG_ON(id == vq->packed.vring.num);
1008
+
1009
+ for (n = 0; n < out_sgs + in_sgs; n++) {
1010
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1011
+ addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1012
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
1013
+ if (vring_mapping_error(vq, addr))
1014
+ goto unmap_release;
1015
+
1016
+ desc[i].flags = cpu_to_le16(n < out_sgs ?
1017
+ 0 : VRING_DESC_F_WRITE);
1018
+ desc[i].addr = cpu_to_le64(addr);
1019
+ desc[i].len = cpu_to_le32(sg->length);
1020
+ i++;
1021
+ }
1022
+ }
1023
+
1024
+ /* Now that the indirect table is filled in, map it. */
1025
+ addr = vring_map_single(vq, desc,
1026
+ total_sg * sizeof(struct vring_packed_desc),
1027
+ DMA_TO_DEVICE);
1028
+ if (vring_mapping_error(vq, addr))
1029
+ goto unmap_release;
1030
+
1031
+ vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1032
+ vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1033
+ sizeof(struct vring_packed_desc));
1034
+ vq->packed.vring.desc[head].id = cpu_to_le16(id);
1035
+
1036
+ if (vq->use_dma_api) {
1037
+ vq->packed.desc_extra[id].addr = addr;
1038
+ vq->packed.desc_extra[id].len = total_sg *
1039
+ sizeof(struct vring_packed_desc);
1040
+ vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1041
+ vq->packed.avail_used_flags;
1042
+ }
1043
+
1044
+ /*
1045
+ * A driver MUST NOT make the first descriptor in the list
1046
+ * available before all subsequent descriptors comprising
1047
+ * the list are made available.
1048
+ */
1049
+ virtio_wmb(vq->weak_barriers);
1050
+ vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1051
+ vq->packed.avail_used_flags);
1052
+
1053
+ /* We're using some buffers from the free list. */
1054
+ vq->vq.num_free -= 1;
1055
+
1056
+ /* Update free pointer */
1057
+ n = head + 1;
1058
+ if (n >= vq->packed.vring.num) {
1059
+ n = 0;
1060
+ vq->packed.avail_wrap_counter ^= 1;
1061
+ vq->packed.avail_used_flags ^=
1062
+ 1 << VRING_PACKED_DESC_F_AVAIL |
1063
+ 1 << VRING_PACKED_DESC_F_USED;
1064
+ }
1065
+ vq->packed.next_avail_idx = n;
1066
+ vq->free_head = vq->packed.desc_state[id].next;
1067
+
1068
+ /* Store token and indirect buffer state. */
1069
+ vq->packed.desc_state[id].num = 1;
1070
+ vq->packed.desc_state[id].data = data;
1071
+ vq->packed.desc_state[id].indir_desc = desc;
1072
+ vq->packed.desc_state[id].last = id;
1073
+
1074
+ vq->num_added += 1;
1075
+
1076
+ pr_debug("Added buffer head %i to %p\n", head, vq);
1077
+ END_USE(vq);
1078
+
1079
+ return 0;
1080
+
1081
+unmap_release:
1082
+ err_idx = i;
1083
+
1084
+ for (i = 0; i < err_idx; i++)
1085
+ vring_unmap_desc_packed(vq, &desc[i]);
1086
+
1087
+ kfree(desc);
1088
+
1089
+ END_USE(vq);
1090
+ return -ENOMEM;
1091
+}
1092
+
1093
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
1094
+ struct scatterlist *sgs[],
1095
+ unsigned int total_sg,
1096
+ unsigned int out_sgs,
1097
+ unsigned int in_sgs,
1098
+ void *data,
1099
+ void *ctx,
1100
+ gfp_t gfp)
1101
+{
1102
+ struct vring_virtqueue *vq = to_vvq(_vq);
1103
+ struct vring_packed_desc *desc;
1104
+ struct scatterlist *sg;
1105
+ unsigned int i, n, c, descs_used, err_idx;
1106
+ __le16 head_flags, flags;
1107
+ u16 head, id, prev, curr, avail_used_flags;
1108
+ int err;
1109
+
1110
+ START_USE(vq);
1111
+
1112
+ BUG_ON(data == NULL);
1113
+ BUG_ON(ctx && vq->indirect);
1114
+
1115
+ if (unlikely(vq->broken)) {
1116
+ END_USE(vq);
1117
+ return -EIO;
1118
+ }
1119
+
1120
+ LAST_ADD_TIME_UPDATE(vq);
1121
+
1122
+ BUG_ON(total_sg == 0);
1123
+
1124
+ if (virtqueue_use_indirect(_vq, total_sg)) {
1125
+ err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1126
+ in_sgs, data, gfp);
1127
+ if (err != -ENOMEM) {
1128
+ END_USE(vq);
1129
+ return err;
1130
+ }
1131
+
1132
+ /* fall back on direct */
1133
+ }
1134
+
1135
+ head = vq->packed.next_avail_idx;
1136
+ avail_used_flags = vq->packed.avail_used_flags;
1137
+
1138
+ WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1139
+
1140
+ desc = vq->packed.vring.desc;
1141
+ i = head;
1142
+ descs_used = total_sg;
1143
+
1144
+ if (unlikely(vq->vq.num_free < descs_used)) {
1145
+ pr_debug("Can't add buf len %i - avail = %i\n",
1146
+ descs_used, vq->vq.num_free);
1147
+ END_USE(vq);
1148
+ return -ENOSPC;
1149
+ }
1150
+
1151
+ id = vq->free_head;
1152
+ BUG_ON(id == vq->packed.vring.num);
1153
+
1154
+ curr = id;
1155
+ c = 0;
1156
+ for (n = 0; n < out_sgs + in_sgs; n++) {
1157
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1158
+ dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1159
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
1160
+ if (vring_mapping_error(vq, addr))
1161
+ goto unmap_release;
1162
+
1163
+ flags = cpu_to_le16(vq->packed.avail_used_flags |
1164
+ (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1165
+ (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1166
+ if (i == head)
1167
+ head_flags = flags;
1168
+ else
1169
+ desc[i].flags = flags;
1170
+
1171
+ desc[i].addr = cpu_to_le64(addr);
1172
+ desc[i].len = cpu_to_le32(sg->length);
1173
+ desc[i].id = cpu_to_le16(id);
1174
+
1175
+ if (unlikely(vq->use_dma_api)) {
1176
+ vq->packed.desc_extra[curr].addr = addr;
1177
+ vq->packed.desc_extra[curr].len = sg->length;
1178
+ vq->packed.desc_extra[curr].flags =
1179
+ le16_to_cpu(flags);
1180
+ }
1181
+ prev = curr;
1182
+ curr = vq->packed.desc_state[curr].next;
1183
+
1184
+ if ((unlikely(++i >= vq->packed.vring.num))) {
1185
+ i = 0;
1186
+ vq->packed.avail_used_flags ^=
1187
+ 1 << VRING_PACKED_DESC_F_AVAIL |
1188
+ 1 << VRING_PACKED_DESC_F_USED;
1189
+ }
1190
+ }
1191
+ }
1192
+
1193
+ if (i <= head)
1194
+ vq->packed.avail_wrap_counter ^= 1;
1195
+
1196
+ /* We're using some buffers from the free list. */
1197
+ vq->vq.num_free -= descs_used;
1198
+
1199
+ /* Update free pointer */
1200
+ vq->packed.next_avail_idx = i;
1201
+ vq->free_head = curr;
1202
+
1203
+ /* Store token. */
1204
+ vq->packed.desc_state[id].num = descs_used;
1205
+ vq->packed.desc_state[id].data = data;
1206
+ vq->packed.desc_state[id].indir_desc = ctx;
1207
+ vq->packed.desc_state[id].last = prev;
1208
+
1209
+ /*
1210
+ * A driver MUST NOT make the first descriptor in the list
1211
+ * available before all subsequent descriptors comprising
1212
+ * the list are made available.
1213
+ */
1214
+ virtio_wmb(vq->weak_barriers);
1215
+ vq->packed.vring.desc[head].flags = head_flags;
1216
+ vq->num_added += descs_used;
1217
+
1218
+ pr_debug("Added buffer head %i to %p\n", head, vq);
1219
+ END_USE(vq);
1220
+
1221
+ return 0;
1222
+
1223
+unmap_release:
1224
+ err_idx = i;
1225
+ i = head;
1226
+
1227
+ vq->packed.avail_used_flags = avail_used_flags;
1228
+
1229
+ for (n = 0; n < total_sg; n++) {
1230
+ if (i == err_idx)
1231
+ break;
1232
+ vring_unmap_desc_packed(vq, &desc[i]);
1233
+ i++;
1234
+ if (i >= vq->packed.vring.num)
1235
+ i = 0;
1236
+ }
1237
+
1238
+ END_USE(vq);
1239
+ return -EIO;
1240
+}
1241
+
1242
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1243
+{
1244
+ struct vring_virtqueue *vq = to_vvq(_vq);
1245
+ u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1246
+ bool needs_kick;
1247
+ union {
1248
+ struct {
1249
+ __le16 off_wrap;
1250
+ __le16 flags;
1251
+ };
1252
+ u32 u32;
1253
+ } snapshot;
1254
+
1255
+ START_USE(vq);
1256
+
1257
+ /*
1258
+ * We need to expose the new flags value before checking notification
1259
+ * suppressions.
1260
+ */
1261
+ virtio_mb(vq->weak_barriers);
1262
+
1263
+ old = vq->packed.next_avail_idx - vq->num_added;
1264
+ new = vq->packed.next_avail_idx;
1265
+ vq->num_added = 0;
1266
+
1267
+ snapshot.u32 = *(u32 *)vq->packed.vring.device;
1268
+ flags = le16_to_cpu(snapshot.flags);
1269
+
1270
+ LAST_ADD_TIME_CHECK(vq);
1271
+ LAST_ADD_TIME_INVALID(vq);
1272
+
1273
+ if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1274
+ needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1275
+ goto out;
1276
+ }
1277
+
1278
+ off_wrap = le16_to_cpu(snapshot.off_wrap);
1279
+
1280
+ wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1281
+ event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1282
+ if (wrap_counter != vq->packed.avail_wrap_counter)
1283
+ event_idx -= vq->packed.vring.num;
1284
+
1285
+ needs_kick = vring_need_event(event_idx, new, old);
1286
+out:
1287
+ END_USE(vq);
1288
+ return needs_kick;
1289
+}
1290
+
1291
+static void detach_buf_packed(struct vring_virtqueue *vq,
1292
+ unsigned int id, void **ctx)
1293
+{
1294
+ struct vring_desc_state_packed *state = NULL;
1295
+ struct vring_packed_desc *desc;
1296
+ unsigned int i, curr;
1297
+
1298
+ state = &vq->packed.desc_state[id];
1299
+
1300
+ /* Clear data ptr. */
1301
+ state->data = NULL;
1302
+
1303
+ vq->packed.desc_state[state->last].next = vq->free_head;
1304
+ vq->free_head = id;
1305
+ vq->vq.num_free += state->num;
1306
+
1307
+ if (unlikely(vq->use_dma_api)) {
1308
+ curr = id;
1309
+ for (i = 0; i < state->num; i++) {
1310
+ vring_unmap_state_packed(vq,
1311
+ &vq->packed.desc_extra[curr]);
1312
+ curr = vq->packed.desc_state[curr].next;
1313
+ }
1314
+ }
1315
+
1316
+ if (vq->indirect) {
1317
+ u32 len;
1318
+
1319
+ /* Free the indirect table, if any, now that it's unmapped. */
1320
+ desc = state->indir_desc;
1321
+ if (!desc)
1322
+ return;
1323
+
1324
+ if (vq->use_dma_api) {
1325
+ len = vq->packed.desc_extra[id].len;
1326
+ for (i = 0; i < len / sizeof(struct vring_packed_desc);
1327
+ i++)
1328
+ vring_unmap_desc_packed(vq, &desc[i]);
1329
+ }
1330
+ kfree(desc);
1331
+ state->indir_desc = NULL;
1332
+ } else if (ctx) {
1333
+ *ctx = state->indir_desc;
1334
+ }
1335
+}
1336
+
1337
+static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1338
+ u16 idx, bool used_wrap_counter)
1339
+{
1340
+ bool avail, used;
1341
+ u16 flags;
1342
+
1343
+ flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1344
+ avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1345
+ used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1346
+
1347
+ return avail == used && used == used_wrap_counter;
1348
+}
1349
+
1350
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
1351
+{
1352
+ return is_used_desc_packed(vq, vq->last_used_idx,
1353
+ vq->packed.used_wrap_counter);
1354
+}
1355
+
1356
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1357
+ unsigned int *len,
1358
+ void **ctx)
1359
+{
1360
+ struct vring_virtqueue *vq = to_vvq(_vq);
1361
+ u16 last_used, id;
1362
+ void *ret;
1363
+
1364
+ START_USE(vq);
1365
+
1366
+ if (unlikely(vq->broken)) {
1367
+ END_USE(vq);
1368
+ return NULL;
1369
+ }
1370
+
1371
+ if (!more_used_packed(vq)) {
1372
+ pr_debug("No more buffers in queue\n");
1373
+ END_USE(vq);
1374
+ return NULL;
1375
+ }
1376
+
1377
+ /* Only get used elements after they have been exposed by host. */
1378
+ virtio_rmb(vq->weak_barriers);
1379
+
1380
+ last_used = vq->last_used_idx;
1381
+ id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1382
+ *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1383
+
1384
+ if (unlikely(id >= vq->packed.vring.num)) {
1385
+ BAD_RING(vq, "id %u out of range\n", id);
1386
+ return NULL;
1387
+ }
1388
+ if (unlikely(!vq->packed.desc_state[id].data)) {
1389
+ BAD_RING(vq, "id %u is not a head!\n", id);
1390
+ return NULL;
1391
+ }
1392
+
1393
+ /* detach_buf_packed clears data, so grab it now. */
1394
+ ret = vq->packed.desc_state[id].data;
1395
+ detach_buf_packed(vq, id, ctx);
1396
+
1397
+ vq->last_used_idx += vq->packed.desc_state[id].num;
1398
+ if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1399
+ vq->last_used_idx -= vq->packed.vring.num;
1400
+ vq->packed.used_wrap_counter ^= 1;
1401
+ }
1402
+
1403
+ /*
1404
+ * If we expect an interrupt for the next entry, tell host
1405
+ * by writing event index and flush out the write before
1406
+ * the read in the next get_buf call.
1407
+ */
1408
+ if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1409
+ virtio_store_mb(vq->weak_barriers,
1410
+ &vq->packed.vring.driver->off_wrap,
1411
+ cpu_to_le16(vq->last_used_idx |
1412
+ (vq->packed.used_wrap_counter <<
1413
+ VRING_PACKED_EVENT_F_WRAP_CTR)));
1414
+
1415
+ LAST_ADD_TIME_INVALID(vq);
1416
+
1417
+ END_USE(vq);
1418
+ return ret;
1419
+}
1420
+
1421
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1422
+{
1423
+ struct vring_virtqueue *vq = to_vvq(_vq);
1424
+
1425
+ if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1426
+ vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1427
+ vq->packed.vring.driver->flags =
1428
+ cpu_to_le16(vq->packed.event_flags_shadow);
1429
+ }
1430
+}
1431
+
1432
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1433
+{
1434
+ struct vring_virtqueue *vq = to_vvq(_vq);
1435
+
1436
+ START_USE(vq);
1437
+
1438
+ /*
1439
+ * We optimistically turn back on interrupts, then check if there was
1440
+ * more to do.
1441
+ */
1442
+
1443
+ if (vq->event) {
1444
+ vq->packed.vring.driver->off_wrap =
1445
+ cpu_to_le16(vq->last_used_idx |
1446
+ (vq->packed.used_wrap_counter <<
1447
+ VRING_PACKED_EVENT_F_WRAP_CTR));
1448
+ /*
1449
+ * We need to update event offset and event wrap
1450
+ * counter first before updating event flags.
1451
+ */
1452
+ virtio_wmb(vq->weak_barriers);
1453
+ }
1454
+
1455
+ if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1456
+ vq->packed.event_flags_shadow = vq->event ?
1457
+ VRING_PACKED_EVENT_FLAG_DESC :
1458
+ VRING_PACKED_EVENT_FLAG_ENABLE;
1459
+ vq->packed.vring.driver->flags =
1460
+ cpu_to_le16(vq->packed.event_flags_shadow);
1461
+ }
1462
+
1463
+ END_USE(vq);
1464
+ return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1465
+ VRING_PACKED_EVENT_F_WRAP_CTR);
1466
+}
1467
+
1468
+static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1469
+{
1470
+ struct vring_virtqueue *vq = to_vvq(_vq);
1471
+ bool wrap_counter;
1472
+ u16 used_idx;
1473
+
1474
+ wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1475
+ used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1476
+
1477
+ return is_used_desc_packed(vq, used_idx, wrap_counter);
1478
+}
1479
+
1480
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1481
+{
1482
+ struct vring_virtqueue *vq = to_vvq(_vq);
1483
+ u16 used_idx, wrap_counter;
1484
+ u16 bufs;
1485
+
1486
+ START_USE(vq);
1487
+
1488
+ /*
1489
+ * We optimistically turn back on interrupts, then check if there was
1490
+ * more to do.
1491
+ */
1492
+
1493
+ if (vq->event) {
1494
+ /* TODO: tune this threshold */
1495
+ bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1496
+ wrap_counter = vq->packed.used_wrap_counter;
1497
+
1498
+ used_idx = vq->last_used_idx + bufs;
1499
+ if (used_idx >= vq->packed.vring.num) {
1500
+ used_idx -= vq->packed.vring.num;
1501
+ wrap_counter ^= 1;
1502
+ }
1503
+
1504
+ vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1505
+ (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1506
+
1507
+ /*
1508
+ * We need to update event offset and event wrap
1509
+ * counter first before updating event flags.
1510
+ */
1511
+ virtio_wmb(vq->weak_barriers);
1512
+ }
1513
+
1514
+ if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1515
+ vq->packed.event_flags_shadow = vq->event ?
1516
+ VRING_PACKED_EVENT_FLAG_DESC :
1517
+ VRING_PACKED_EVENT_FLAG_ENABLE;
1518
+ vq->packed.vring.driver->flags =
1519
+ cpu_to_le16(vq->packed.event_flags_shadow);
1520
+ }
1521
+
1522
+ /*
1523
+ * We need to update event suppression structure first
1524
+ * before re-checking for more used buffers.
1525
+ */
1526
+ virtio_mb(vq->weak_barriers);
1527
+
1528
+ if (is_used_desc_packed(vq,
1529
+ vq->last_used_idx,
1530
+ vq->packed.used_wrap_counter)) {
1531
+ END_USE(vq);
1532
+ return false;
1533
+ }
1534
+
1535
+ END_USE(vq);
1536
+ return true;
1537
+}
1538
+
1539
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1540
+{
1541
+ struct vring_virtqueue *vq = to_vvq(_vq);
1542
+ unsigned int i;
1543
+ void *buf;
1544
+
1545
+ START_USE(vq);
1546
+
1547
+ for (i = 0; i < vq->packed.vring.num; i++) {
1548
+ if (!vq->packed.desc_state[i].data)
1549
+ continue;
1550
+ /* detach_buf clears data, so grab it now. */
1551
+ buf = vq->packed.desc_state[i].data;
1552
+ detach_buf_packed(vq, i, NULL);
1553
+ END_USE(vq);
1554
+ return buf;
1555
+ }
1556
+ /* That should have freed everything. */
1557
+ BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1558
+
1559
+ END_USE(vq);
1560
+ return NULL;
1561
+}
1562
+
1563
+static struct virtqueue *vring_create_virtqueue_packed(
1564
+ unsigned int index,
1565
+ unsigned int num,
1566
+ unsigned int vring_align,
1567
+ struct virtio_device *vdev,
1568
+ bool weak_barriers,
1569
+ bool may_reduce_num,
1570
+ bool context,
1571
+ bool (*notify)(struct virtqueue *),
1572
+ void (*callback)(struct virtqueue *),
1573
+ const char *name)
1574
+{
1575
+ struct vring_virtqueue *vq;
1576
+ struct vring_packed_desc *ring;
1577
+ struct vring_packed_desc_event *driver, *device;
1578
+ dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1579
+ size_t ring_size_in_bytes, event_size_in_bytes;
1580
+ unsigned int i;
1581
+
1582
+ ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1583
+
1584
+ ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1585
+ &ring_dma_addr,
1586
+ GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1587
+ if (!ring)
1588
+ goto err_ring;
1589
+
1590
+ event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1591
+
1592
+ driver = vring_alloc_queue(vdev, event_size_in_bytes,
1593
+ &driver_event_dma_addr,
1594
+ GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1595
+ if (!driver)
1596
+ goto err_driver;
1597
+
1598
+ device = vring_alloc_queue(vdev, event_size_in_bytes,
1599
+ &device_event_dma_addr,
1600
+ GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1601
+ if (!device)
1602
+ goto err_device;
1603
+
1604
+ vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1605
+ if (!vq)
1606
+ goto err_vq;
1607
+
1608
+ vq->vq.callback = callback;
1609
+ vq->vq.vdev = vdev;
1610
+ vq->vq.name = name;
1611
+ vq->vq.num_free = num;
1612
+ vq->vq.index = index;
1613
+ vq->we_own_ring = true;
1614
+ vq->notify = notify;
1615
+ vq->weak_barriers = weak_barriers;
1616
+ vq->broken = false;
1617
+ vq->last_used_idx = 0;
1618
+ vq->num_added = 0;
1619
+ vq->packed_ring = true;
1620
+ vq->use_dma_api = vring_use_dma_api(vdev);
1621
+#ifdef DEBUG
1622
+ vq->in_use = false;
1623
+ vq->last_add_time_valid = false;
1624
+#endif
1625
+
1626
+ vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1627
+ !context;
1628
+ vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1629
+
1630
+ if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1631
+ vq->weak_barriers = false;
1632
+
1633
+ vq->packed.ring_dma_addr = ring_dma_addr;
1634
+ vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1635
+ vq->packed.device_event_dma_addr = device_event_dma_addr;
1636
+
1637
+ vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1638
+ vq->packed.event_size_in_bytes = event_size_in_bytes;
1639
+
1640
+ vq->packed.vring.num = num;
1641
+ vq->packed.vring.desc = ring;
1642
+ vq->packed.vring.driver = driver;
1643
+ vq->packed.vring.device = device;
1644
+
1645
+ vq->packed.next_avail_idx = 0;
1646
+ vq->packed.avail_wrap_counter = 1;
1647
+ vq->packed.used_wrap_counter = 1;
1648
+ vq->packed.event_flags_shadow = 0;
1649
+ vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1650
+
1651
+ vq->packed.desc_state = kmalloc_array(num,
1652
+ sizeof(struct vring_desc_state_packed),
1653
+ GFP_KERNEL);
1654
+ if (!vq->packed.desc_state)
1655
+ goto err_desc_state;
1656
+
1657
+ memset(vq->packed.desc_state, 0,
1658
+ num * sizeof(struct vring_desc_state_packed));
1659
+
1660
+ /* Put everything in free lists. */
1661
+ vq->free_head = 0;
1662
+ for (i = 0; i < num-1; i++)
1663
+ vq->packed.desc_state[i].next = i + 1;
1664
+
1665
+ vq->packed.desc_extra = kmalloc_array(num,
1666
+ sizeof(struct vring_desc_extra_packed),
1667
+ GFP_KERNEL);
1668
+ if (!vq->packed.desc_extra)
1669
+ goto err_desc_extra;
1670
+
1671
+ memset(vq->packed.desc_extra, 0,
1672
+ num * sizeof(struct vring_desc_extra_packed));
1673
+
1674
+ /* No callback? Tell other side not to bother us. */
1675
+ if (!callback) {
1676
+ vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1677
+ vq->packed.vring.driver->flags =
1678
+ cpu_to_le16(vq->packed.event_flags_shadow);
1679
+ }
1680
+
1681
+ list_add_tail(&vq->vq.list, &vdev->vqs);
1682
+ return &vq->vq;
1683
+
1684
+err_desc_extra:
1685
+ kfree(vq->packed.desc_state);
1686
+err_desc_state:
1687
+ kfree(vq);
1688
+err_vq:
1689
+ vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1690
+err_device:
1691
+ vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1692
+err_driver:
1693
+ vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1694
+err_ring:
1695
+ return NULL;
1696
+}
1697
+
1698
+
1699
+/*
1700
+ * Generic functions and exported symbols.
1701
+ */
1702
+
1703
+static inline int virtqueue_add(struct virtqueue *_vq,
1704
+ struct scatterlist *sgs[],
1705
+ unsigned int total_sg,
1706
+ unsigned int out_sgs,
1707
+ unsigned int in_sgs,
1708
+ void *data,
1709
+ void *ctx,
1710
+ gfp_t gfp)
1711
+{
1712
+ struct vring_virtqueue *vq = to_vvq(_vq);
1713
+
1714
+ return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1715
+ out_sgs, in_sgs, data, ctx, gfp) :
1716
+ virtqueue_add_split(_vq, sgs, total_sg,
1717
+ out_sgs, in_sgs, data, ctx, gfp);
1718
+}
1719
+
4371720 /**
4381721 * virtqueue_add_sgs - expose buffers to other end
439
- * @vq: the struct virtqueue we're talking about.
1722
+ * @_vq: the struct virtqueue we're talking about.
4401723 * @sgs: array of terminated scatterlists.
441
- * @out_num: the number of scatterlists readable by other side
442
- * @in_num: the number of scatterlists which are writable (after readable ones)
1724
+ * @out_sgs: the number of scatterlists readable by other side
1725
+ * @in_sgs: the number of scatterlists which are writable (after readable ones)
4431726 * @data: the token identifying the buffer.
4441727 * @gfp: how to do memory allocations (if necessary).
4451728 *
....@@ -460,6 +1743,7 @@
4601743 /* Count them first. */
4611744 for (i = 0; i < out_sgs + in_sgs; i++) {
4621745 struct scatterlist *sg;
1746
+
4631747 for (sg = sgs[i]; sg; sg = sg_next(sg))
4641748 total_sg++;
4651749 }
....@@ -538,7 +1822,7 @@
5381822
5391823 /**
5401824 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
541
- * @vq: the struct virtqueue
1825
+ * @_vq: the struct virtqueue
5421826 *
5431827 * Instead of virtqueue_kick(), you can do:
5441828 * if (virtqueue_kick_prepare(vq))
....@@ -550,40 +1834,15 @@
5501834 bool virtqueue_kick_prepare(struct virtqueue *_vq)
5511835 {
5521836 struct vring_virtqueue *vq = to_vvq(_vq);
553
- u16 new, old;
554
- bool needs_kick;
5551837
556
- START_USE(vq);
557
- /* We need to expose available array entries before checking avail
558
- * event. */
559
- virtio_mb(vq->weak_barriers);
560
-
561
- old = vq->avail_idx_shadow - vq->num_added;
562
- new = vq->avail_idx_shadow;
563
- vq->num_added = 0;
564
-
565
-#ifdef DEBUG
566
- if (vq->last_add_time_valid) {
567
- WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
568
- vq->last_add_time)) > 100);
569
- }
570
- vq->last_add_time_valid = false;
571
-#endif
572
-
573
- if (vq->event) {
574
- needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
575
- new, old);
576
- } else {
577
- needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
578
- }
579
- END_USE(vq);
580
- return needs_kick;
1838
+ return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1839
+ virtqueue_kick_prepare_split(_vq);
5811840 }
5821841 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
5831842
5841843 /**
5851844 * virtqueue_notify - second half of split virtqueue_kick call.
586
- * @vq: the struct virtqueue
1845
+ * @_vq: the struct virtqueue
5871846 *
5881847 * This does not need to be serialized.
5891848 *
....@@ -625,64 +1884,11 @@
6251884 }
6261885 EXPORT_SYMBOL_GPL(virtqueue_kick);
6271886
628
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
629
- void **ctx)
630
-{
631
- unsigned int i, j;
632
- __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
633
-
634
- /* Clear data ptr. */
635
- vq->desc_state[head].data = NULL;
636
-
637
- /* Put back on free list: unmap first-level descriptors and find end */
638
- i = head;
639
-
640
- while (vq->vring.desc[i].flags & nextflag) {
641
- vring_unmap_one(vq, &vq->vring.desc[i]);
642
- i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
643
- vq->vq.num_free++;
644
- }
645
-
646
- vring_unmap_one(vq, &vq->vring.desc[i]);
647
- vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
648
- vq->free_head = head;
649
-
650
- /* Plus final descriptor */
651
- vq->vq.num_free++;
652
-
653
- if (vq->indirect) {
654
- struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
655
- u32 len;
656
-
657
- /* Free the indirect table, if any, now that it's unmapped. */
658
- if (!indir_desc)
659
- return;
660
-
661
- len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
662
-
663
- BUG_ON(!(vq->vring.desc[head].flags &
664
- cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
665
- BUG_ON(len == 0 || len % sizeof(struct vring_desc));
666
-
667
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
668
- vring_unmap_one(vq, &indir_desc[j]);
669
-
670
- kfree(indir_desc);
671
- vq->desc_state[head].indir_desc = NULL;
672
- } else if (ctx) {
673
- *ctx = vq->desc_state[head].indir_desc;
674
- }
675
-}
676
-
677
-static inline bool more_used(const struct vring_virtqueue *vq)
678
-{
679
- return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
680
-}
681
-
6821887 /**
6831888 * virtqueue_get_buf - get the next used buffer
684
- * @vq: the struct virtqueue we're talking about.
1889
+ * @_vq: the struct virtqueue we're talking about.
6851890 * @len: the length written into the buffer
1891
+ * @ctx: extra context for the token
6861892 *
6871893 * If the device wrote data into the buffer, @len will be set to the
6881894 * amount written. This means you don't need to clear the buffer
....@@ -699,57 +1905,9 @@
6991905 void **ctx)
7001906 {
7011907 struct vring_virtqueue *vq = to_vvq(_vq);
702
- void *ret;
703
- unsigned int i;
704
- u16 last_used;
7051908
706
- START_USE(vq);
707
-
708
- if (unlikely(vq->broken)) {
709
- END_USE(vq);
710
- return NULL;
711
- }
712
-
713
- if (!more_used(vq)) {
714
- pr_debug("No more buffers in queue\n");
715
- END_USE(vq);
716
- return NULL;
717
- }
718
-
719
- /* Only get used array entries after they have been exposed by host. */
720
- virtio_rmb(vq->weak_barriers);
721
-
722
- last_used = (vq->last_used_idx & (vq->vring.num - 1));
723
- i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
724
- *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
725
-
726
- if (unlikely(i >= vq->vring.num)) {
727
- BAD_RING(vq, "id %u out of range\n", i);
728
- return NULL;
729
- }
730
- if (unlikely(!vq->desc_state[i].data)) {
731
- BAD_RING(vq, "id %u is not a head!\n", i);
732
- return NULL;
733
- }
734
-
735
- /* detach_buf clears data, so grab it now. */
736
- ret = vq->desc_state[i].data;
737
- detach_buf(vq, i, ctx);
738
- vq->last_used_idx++;
739
- /* If we expect an interrupt for the next entry, tell host
740
- * by writing event index and flush out the write before
741
- * the read in the next get_buf call. */
742
- if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
743
- virtio_store_mb(vq->weak_barriers,
744
- &vring_used_event(&vq->vring),
745
- cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
746
-
747
-#ifdef DEBUG
748
- vq->last_add_time_valid = false;
749
-#endif
750
-
751
- END_USE(vq);
752
- return ret;
1909
+ return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1910
+ virtqueue_get_buf_ctx_split(_vq, len, ctx);
7531911 }
7541912 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
7551913
....@@ -760,7 +1918,7 @@
7601918 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
7611919 /**
7621920 * virtqueue_disable_cb - disable callbacks
763
- * @vq: the struct virtqueue we're talking about.
1921
+ * @_vq: the struct virtqueue we're talking about.
7641922 *
7651923 * Note that this is not necessarily synchronous, hence unreliable and only
7661924 * useful as an optimization.
....@@ -771,18 +1929,16 @@
7711929 {
7721930 struct vring_virtqueue *vq = to_vvq(_vq);
7731931
774
- if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
775
- vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
776
- if (!vq->event)
777
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
778
- }
779
-
1932
+ if (vq->packed_ring)
1933
+ virtqueue_disable_cb_packed(_vq);
1934
+ else
1935
+ virtqueue_disable_cb_split(_vq);
7801936 }
7811937 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
7821938
7831939 /**
7841940 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
785
- * @vq: the struct virtqueue we're talking about.
1941
+ * @_vq: the struct virtqueue we're talking about.
7861942 *
7871943 * This re-enables callbacks; it returns current queue state
7881944 * in an opaque unsigned value. This value should be later tested by
....@@ -795,29 +1951,15 @@
7951951 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
7961952 {
7971953 struct vring_virtqueue *vq = to_vvq(_vq);
798
- u16 last_used_idx;
7991954
800
- START_USE(vq);
801
-
802
- /* We optimistically turn back on interrupts, then check if there was
803
- * more to do. */
804
- /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
805
- * either clear the flags bit or point the event index at the next
806
- * entry. Always do both to keep code simple. */
807
- if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
808
- vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
809
- if (!vq->event)
810
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
811
- }
812
- vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
813
- END_USE(vq);
814
- return last_used_idx;
1955
+ return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
1956
+ virtqueue_enable_cb_prepare_split(_vq);
8151957 }
8161958 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
8171959
8181960 /**
8191961 * virtqueue_poll - query pending used buffers
820
- * @vq: the struct virtqueue we're talking about.
1962
+ * @_vq: the struct virtqueue we're talking about.
8211963 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
8221964 *
8231965 * Returns "true" if there are pending used buffers in the queue.
....@@ -832,13 +1974,14 @@
8321974 return false;
8331975
8341976 virtio_mb(vq->weak_barriers);
835
- return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
1977
+ return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
1978
+ virtqueue_poll_split(_vq, last_used_idx);
8361979 }
8371980 EXPORT_SYMBOL_GPL(virtqueue_poll);
8381981
8391982 /**
8401983 * virtqueue_enable_cb - restart callbacks after disable_cb.
841
- * @vq: the struct virtqueue we're talking about.
1984
+ * @_vq: the struct virtqueue we're talking about.
8421985 *
8431986 * This re-enables callbacks; it returns "false" if there are pending
8441987 * buffers in the queue, to detect a possible race between the driver
....@@ -850,13 +1993,14 @@
8501993 bool virtqueue_enable_cb(struct virtqueue *_vq)
8511994 {
8521995 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
1996
+
8531997 return !virtqueue_poll(_vq, last_used_idx);
8541998 }
8551999 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
8562000
8572001 /**
8582002 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
859
- * @vq: the struct virtqueue we're talking about.
2003
+ * @_vq: the struct virtqueue we're talking about.
8602004 *
8612005 * This re-enables callbacks but hints to the other side to delay
8622006 * interrupts until most of the available buffers have been processed;
....@@ -870,40 +2014,15 @@
8702014 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
8712015 {
8722016 struct vring_virtqueue *vq = to_vvq(_vq);
873
- u16 bufs;
8742017
875
- START_USE(vq);
876
-
877
- /* We optimistically turn back on interrupts, then check if there was
878
- * more to do. */
879
- /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
880
- * either clear the flags bit or point the event index at the next
881
- * entry. Always update the event index to keep code simple. */
882
- if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
883
- vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
884
- if (!vq->event)
885
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
886
- }
887
- /* TODO: tune this threshold */
888
- bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
889
-
890
- virtio_store_mb(vq->weak_barriers,
891
- &vring_used_event(&vq->vring),
892
- cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
893
-
894
- if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
895
- END_USE(vq);
896
- return false;
897
- }
898
-
899
- END_USE(vq);
900
- return true;
2018
+ return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2019
+ virtqueue_enable_cb_delayed_split(_vq);
9012020 }
9022021 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
9032022
9042023 /**
9052024 * virtqueue_detach_unused_buf - detach first unused buffer
906
- * @vq: the struct virtqueue we're talking about.
2025
+ * @_vq: the struct virtqueue we're talking about.
9072026 *
9082027 * Returns NULL or the "data" token handed to virtqueue_add_*().
9092028 * This is not valid on an active queue; it is useful only for device
....@@ -912,29 +2031,16 @@
9122031 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
9132032 {
9142033 struct vring_virtqueue *vq = to_vvq(_vq);
915
- unsigned int i;
916
- void *buf;
9172034
918
- START_USE(vq);
919
-
920
- for (i = 0; i < vq->vring.num; i++) {
921
- if (!vq->desc_state[i].data)
922
- continue;
923
- /* detach_buf clears data, so grab it now. */
924
- buf = vq->desc_state[i].data;
925
- detach_buf(vq, i, NULL);
926
- vq->avail_idx_shadow--;
927
- vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
928
- END_USE(vq);
929
- return buf;
930
- }
931
- /* That should have freed everything. */
932
- BUG_ON(vq->vq.num_free != vq->vring.num);
933
-
934
- END_USE(vq);
935
- return NULL;
2035
+ return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2036
+ virtqueue_detach_unused_buf_split(_vq);
9362037 }
9372038 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2039
+
2040
+static inline bool more_used(const struct vring_virtqueue *vq)
2041
+{
2042
+ return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2043
+}
9382044
9392045 irqreturn_t vring_interrupt(int irq, void *_vq)
9402046 {
....@@ -956,6 +2062,7 @@
9562062 }
9572063 EXPORT_SYMBOL_GPL(vring_interrupt);
9582064
2065
+/* Only available for split ring */
9592066 struct virtqueue *__vring_new_virtqueue(unsigned int index,
9602067 struct vring vring,
9612068 struct virtio_device *vdev,
....@@ -968,28 +2075,26 @@
9682075 unsigned int i;
9692076 struct vring_virtqueue *vq;
9702077
971
- vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
972
- GFP_KERNEL);
2078
+ if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2079
+ return NULL;
2080
+
2081
+ vq = kmalloc(sizeof(*vq), GFP_KERNEL);
9732082 if (!vq)
9742083 return NULL;
9752084
976
- vq->vring = vring;
2085
+ vq->packed_ring = false;
9772086 vq->vq.callback = callback;
9782087 vq->vq.vdev = vdev;
9792088 vq->vq.name = name;
9802089 vq->vq.num_free = vring.num;
9812090 vq->vq.index = index;
9822091 vq->we_own_ring = false;
983
- vq->queue_dma_addr = 0;
984
- vq->queue_size_in_bytes = 0;
9852092 vq->notify = notify;
9862093 vq->weak_barriers = weak_barriers;
9872094 vq->broken = false;
9882095 vq->last_used_idx = 0;
989
- vq->avail_flags_shadow = 0;
990
- vq->avail_idx_shadow = 0;
9912096 vq->num_added = 0;
992
- list_add_tail(&vq->vq.list, &vdev->vqs);
2097
+ vq->use_dma_api = vring_use_dma_api(vdev);
9932098 #ifdef DEBUG
9942099 vq->in_use = false;
9952100 vq->last_add_time_valid = false;
....@@ -999,64 +2104,42 @@
9992104 !context;
10002105 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
10012106
2107
+ if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2108
+ vq->weak_barriers = false;
2109
+
2110
+ vq->split.queue_dma_addr = 0;
2111
+ vq->split.queue_size_in_bytes = 0;
2112
+
2113
+ vq->split.vring = vring;
2114
+ vq->split.avail_flags_shadow = 0;
2115
+ vq->split.avail_idx_shadow = 0;
2116
+
10022117 /* No callback? Tell other side not to bother us. */
10032118 if (!callback) {
1004
- vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2119
+ vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
10052120 if (!vq->event)
1006
- vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
2121
+ vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2122
+ vq->split.avail_flags_shadow);
2123
+ }
2124
+
2125
+ vq->split.desc_state = kmalloc_array(vring.num,
2126
+ sizeof(struct vring_desc_state_split), GFP_KERNEL);
2127
+ if (!vq->split.desc_state) {
2128
+ kfree(vq);
2129
+ return NULL;
10072130 }
10082131
10092132 /* Put everything in free lists. */
10102133 vq->free_head = 0;
10112134 for (i = 0; i < vring.num-1; i++)
1012
- vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
1013
- memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
2135
+ vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
2136
+ memset(vq->split.desc_state, 0, vring.num *
2137
+ sizeof(struct vring_desc_state_split));
10142138
2139
+ list_add_tail(&vq->vq.list, &vdev->vqs);
10152140 return &vq->vq;
10162141 }
10172142 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
1018
-
1019
-static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
1020
- dma_addr_t *dma_handle, gfp_t flag)
1021
-{
1022
- if (vring_use_dma_api(vdev)) {
1023
- return dma_alloc_coherent(vdev->dev.parent, size,
1024
- dma_handle, flag);
1025
- } else {
1026
- void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
1027
- if (queue) {
1028
- phys_addr_t phys_addr = virt_to_phys(queue);
1029
- *dma_handle = (dma_addr_t)phys_addr;
1030
-
1031
- /*
1032
- * Sanity check: make sure we dind't truncate
1033
- * the address. The only arches I can find that
1034
- * have 64-bit phys_addr_t but 32-bit dma_addr_t
1035
- * are certain non-highmem MIPS and x86
1036
- * configurations, but these configurations
1037
- * should never allocate physical pages above 32
1038
- * bits, so this is fine. Just in case, throw a
1039
- * warning and abort if we end up with an
1040
- * unrepresentable address.
1041
- */
1042
- if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
1043
- free_pages_exact(queue, PAGE_ALIGN(size));
1044
- return NULL;
1045
- }
1046
- }
1047
- return queue;
1048
- }
1049
-}
1050
-
1051
-static void vring_free_queue(struct virtio_device *vdev, size_t size,
1052
- void *queue, dma_addr_t dma_handle)
1053
-{
1054
- if (vring_use_dma_api(vdev)) {
1055
- dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
1056
- } else {
1057
- free_pages_exact(queue, PAGE_ALIGN(size));
1058
- }
1059
-}
10602143
10612144 struct virtqueue *vring_create_virtqueue(
10622145 unsigned int index,
....@@ -1070,59 +2153,19 @@
10702153 void (*callback)(struct virtqueue *),
10712154 const char *name)
10722155 {
1073
- struct virtqueue *vq;
1074
- void *queue = NULL;
1075
- dma_addr_t dma_addr;
1076
- size_t queue_size_in_bytes;
1077
- struct vring vring;
10782156
1079
- /* We assume num is a power of 2. */
1080
- if (num & (num - 1)) {
1081
- dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1082
- return NULL;
1083
- }
2157
+ if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2158
+ return vring_create_virtqueue_packed(index, num, vring_align,
2159
+ vdev, weak_barriers, may_reduce_num,
2160
+ context, notify, callback, name);
10842161
1085
- /* TODO: allocate each queue chunk individually */
1086
- for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1087
- queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1088
- &dma_addr,
1089
- GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1090
- if (queue)
1091
- break;
1092
- if (!may_reduce_num)
1093
- return NULL;
1094
- }
1095
-
1096
- if (!num)
1097
- return NULL;
1098
-
1099
- if (!queue) {
1100
- /* Try to get a single page. You are my only hope! */
1101
- queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1102
- &dma_addr, GFP_KERNEL|__GFP_ZERO);
1103
- }
1104
- if (!queue)
1105
- return NULL;
1106
-
1107
- queue_size_in_bytes = vring_size(num, vring_align);
1108
- vring_init(&vring, num, queue, vring_align);
1109
-
1110
- vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
1111
- notify, callback, name);
1112
- if (!vq) {
1113
- vring_free_queue(vdev, queue_size_in_bytes, queue,
1114
- dma_addr);
1115
- return NULL;
1116
- }
1117
-
1118
- to_vvq(vq)->queue_dma_addr = dma_addr;
1119
- to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
1120
- to_vvq(vq)->we_own_ring = true;
1121
-
1122
- return vq;
2162
+ return vring_create_virtqueue_split(index, num, vring_align,
2163
+ vdev, weak_barriers, may_reduce_num,
2164
+ context, notify, callback, name);
11232165 }
11242166 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
11252167
2168
+/* Only available for split ring */
11262169 struct virtqueue *vring_new_virtqueue(unsigned int index,
11272170 unsigned int num,
11282171 unsigned int vring_align,
....@@ -1135,6 +2178,10 @@
11352178 const char *name)
11362179 {
11372180 struct vring vring;
2181
+
2182
+ if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2183
+ return NULL;
2184
+
11382185 vring_init(&vring, num, pages, vring_align);
11392186 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
11402187 notify, callback, name);
....@@ -1146,9 +2193,33 @@
11462193 struct vring_virtqueue *vq = to_vvq(_vq);
11472194
11482195 if (vq->we_own_ring) {
1149
- vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
1150
- vq->vring.desc, vq->queue_dma_addr);
2196
+ if (vq->packed_ring) {
2197
+ vring_free_queue(vq->vq.vdev,
2198
+ vq->packed.ring_size_in_bytes,
2199
+ vq->packed.vring.desc,
2200
+ vq->packed.ring_dma_addr);
2201
+
2202
+ vring_free_queue(vq->vq.vdev,
2203
+ vq->packed.event_size_in_bytes,
2204
+ vq->packed.vring.driver,
2205
+ vq->packed.driver_event_dma_addr);
2206
+
2207
+ vring_free_queue(vq->vq.vdev,
2208
+ vq->packed.event_size_in_bytes,
2209
+ vq->packed.vring.device,
2210
+ vq->packed.device_event_dma_addr);
2211
+
2212
+ kfree(vq->packed.desc_state);
2213
+ kfree(vq->packed.desc_extra);
2214
+ } else {
2215
+ vring_free_queue(vq->vq.vdev,
2216
+ vq->split.queue_size_in_bytes,
2217
+ vq->split.vring.desc,
2218
+ vq->split.queue_dma_addr);
2219
+ }
11512220 }
2221
+ if (!vq->packed_ring)
2222
+ kfree(vq->split.desc_state);
11522223 list_del(&_vq->list);
11532224 kfree(vq);
11542225 }
....@@ -1167,7 +2238,11 @@
11672238 break;
11682239 case VIRTIO_F_VERSION_1:
11692240 break;
1170
- case VIRTIO_F_IOMMU_PLATFORM:
2241
+ case VIRTIO_F_ACCESS_PLATFORM:
2242
+ break;
2243
+ case VIRTIO_F_RING_PACKED:
2244
+ break;
2245
+ case VIRTIO_F_ORDER_PLATFORM:
11712246 break;
11722247 default:
11732248 /* We don't understand this bit. */
....@@ -1179,7 +2254,7 @@
11792254
11802255 /**
11812256 * virtqueue_get_vring_size - return the size of the virtqueue's vring
1182
- * @vq: the struct virtqueue containing the vring of interest.
2257
+ * @_vq: the struct virtqueue containing the vring of interest.
11832258 *
11842259 * Returns the size of the vring. This is mainly used for boasting to
11852260 * userspace. Unlike other operations, this need not be serialized.
....@@ -1189,7 +2264,7 @@
11892264
11902265 struct vring_virtqueue *vq = to_vvq(_vq);
11912266
1192
- return vq->vring.num;
2267
+ return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
11932268 }
11942269 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
11952270
....@@ -1224,7 +2299,10 @@
12242299
12252300 BUG_ON(!vq->we_own_ring);
12262301
1227
- return vq->queue_dma_addr;
2302
+ if (vq->packed_ring)
2303
+ return vq->packed.ring_dma_addr;
2304
+
2305
+ return vq->split.queue_dma_addr;
12282306 }
12292307 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
12302308
....@@ -1234,8 +2312,11 @@
12342312
12352313 BUG_ON(!vq->we_own_ring);
12362314
1237
- return vq->queue_dma_addr +
1238
- ((char *)vq->vring.avail - (char *)vq->vring.desc);
2315
+ if (vq->packed_ring)
2316
+ return vq->packed.driver_event_dma_addr;
2317
+
2318
+ return vq->split.queue_dma_addr +
2319
+ ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
12392320 }
12402321 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
12412322
....@@ -1245,14 +2326,18 @@
12452326
12462327 BUG_ON(!vq->we_own_ring);
12472328
1248
- return vq->queue_dma_addr +
1249
- ((char *)vq->vring.used - (char *)vq->vring.desc);
2329
+ if (vq->packed_ring)
2330
+ return vq->packed.device_event_dma_addr;
2331
+
2332
+ return vq->split.queue_dma_addr +
2333
+ ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
12502334 }
12512335 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
12522336
2337
+/* Only available for split ring */
12532338 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
12542339 {
1255
- return &to_vvq(vq)->vring;
2340
+ return &to_vvq(vq)->split.vring;
12562341 }
12572342 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
12582343