hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/virtio/virtio_balloon.c
....@@ -1,22 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Virtio balloon implementation, inspired by Dor Laor and Marcelo
34 * Tosatti's implementations.
45 *
56 * Copyright 2008 Rusty Russell IBM Corporation
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
207 */
218
229 #include <linux/virtio.h>
....@@ -27,10 +14,13 @@
2714 #include <linux/slab.h>
2815 #include <linux/module.h>
2916 #include <linux/balloon_compaction.h>
17
+#include <linux/oom.h>
3018 #include <linux/wait.h>
3119 #include <linux/mm.h>
3220 #include <linux/mount.h>
3321 #include <linux/magic.h>
22
+#include <linux/pseudo_fs.h>
23
+#include <linux/page_reporting.h>
3424
3525 /*
3626 * Balloon device works in 4K page units. So each page is pointed to by
....@@ -39,15 +29,44 @@
3929 */
4030 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
4131 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
42
-#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
32
+/* Maximum number of (4k) pages to deflate on OOM notifications. */
33
+#define VIRTIO_BALLOON_OOM_NR_PAGES 256
34
+#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80
35
+
36
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
37
+ __GFP_NOMEMALLOC)
38
+/* The order of free page blocks to report to host */
39
+#define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1)
40
+/* The size of a free page block in bytes */
41
+#define VIRTIO_BALLOON_HINT_BLOCK_BYTES \
42
+ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
43
+#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
4344
4445 #ifdef CONFIG_BALLOON_COMPACTION
4546 static struct vfsmount *balloon_mnt;
4647 #endif
4748
49
+enum virtio_balloon_vq {
50
+ VIRTIO_BALLOON_VQ_INFLATE,
51
+ VIRTIO_BALLOON_VQ_DEFLATE,
52
+ VIRTIO_BALLOON_VQ_STATS,
53
+ VIRTIO_BALLOON_VQ_FREE_PAGE,
54
+ VIRTIO_BALLOON_VQ_REPORTING,
55
+ VIRTIO_BALLOON_VQ_MAX
56
+};
57
+
58
+enum virtio_balloon_config_read {
59
+ VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0,
60
+};
61
+
4862 struct virtio_balloon {
4963 struct virtio_device *vdev;
50
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
64
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
65
+
66
+ /* Balloon's own wq for cpu-intensive work items */
67
+ struct workqueue_struct *balloon_wq;
68
+ /* The free page reporting work item submitted to the balloon wq */
69
+ struct work_struct report_free_page_work;
5170
5271 /* The balloon servicing is delegated to a freezable workqueue. */
5372 struct work_struct update_balloon_stats_work;
....@@ -56,6 +75,24 @@
5675 /* Prevent updating balloon when it is being canceled. */
5776 spinlock_t stop_update_lock;
5877 bool stop_update;
78
+ /* Bitmap to indicate if reading the related config fields are needed */
79
+ unsigned long config_read_bitmap;
80
+
81
+ /* The list of allocated free pages, waiting to be given back to mm */
82
+ struct list_head free_page_list;
83
+ spinlock_t free_page_list_lock;
84
+ /* The number of free page blocks on the above list */
85
+ unsigned long num_free_page_blocks;
86
+ /*
87
+ * The cmd id received from host.
88
+ * Read it via virtio_balloon_cmd_id_received to get the latest value
89
+ * sent from host.
90
+ */
91
+ u32 cmd_id_received_cache;
92
+ /* The cmd id that is actively in use */
93
+ __virtio32 cmd_id_active;
94
+ /* Buffer to store the stop sign */
95
+ __virtio32 cmd_id_stop;
5996
6097 /* Waiting for host to ack the pages we released. */
6198 wait_queue_head_t acked;
....@@ -80,11 +117,18 @@
80117 /* Memory statistics */
81118 struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
82119
83
- /* To register a shrinker to shrink memory upon memory pressure */
120
+ /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
84121 struct shrinker shrinker;
122
+
123
+ /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
124
+ struct notifier_block oom_nb;
125
+
126
+ /* Free page reporting device */
127
+ struct virtqueue *reporting_vq;
128
+ struct page_reporting_dev_info pr_dev_info;
85129 };
86130
87
-static struct virtio_device_id id_table[] = {
131
+static const struct virtio_device_id id_table[] = {
88132 { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
89133 { 0 },
90134 };
....@@ -119,6 +163,33 @@
119163 /* When host has read buffer, this completes via balloon_ack */
120164 wait_event(vb->acked, virtqueue_get_buf(vq, &len));
121165
166
+}
167
+
168
+static int virtballoon_free_page_report(struct page_reporting_dev_info *pr_dev_info,
169
+ struct scatterlist *sg, unsigned int nents)
170
+{
171
+ struct virtio_balloon *vb =
172
+ container_of(pr_dev_info, struct virtio_balloon, pr_dev_info);
173
+ struct virtqueue *vq = vb->reporting_vq;
174
+ unsigned int unused, err;
175
+
176
+ /* We should always be able to add these buffers to an empty queue. */
177
+ err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT | __GFP_NOWARN);
178
+
179
+ /*
180
+ * In the extremely unlikely case that something has occurred and we
181
+ * are able to trigger an error we will simply display a warning
182
+ * and exit without actually processing the pages.
183
+ */
184
+ if (WARN_ON_ONCE(err))
185
+ return err;
186
+
187
+ virtqueue_kick(vq);
188
+
189
+ /* When host has read buffer, this completes via balloon_ack */
190
+ wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
191
+
192
+ return 0;
122193 }
123194
124195 static void set_page_pfns(struct virtio_balloon *vb,
....@@ -322,31 +393,65 @@
322393 virtqueue_kick(vq);
323394 }
324395
396
+static inline s64 towards_target(struct virtio_balloon *vb)
397
+{
398
+ s64 target;
399
+ u32 num_pages;
400
+
401
+ /* Legacy balloon config space is LE, unlike all other devices. */
402
+ virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages,
403
+ &num_pages);
404
+
405
+ target = num_pages;
406
+ return target - vb->num_pages;
407
+}
408
+
409
+/* Gives back @num_to_return blocks of free pages to mm. */
410
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
411
+ unsigned long num_to_return)
412
+{
413
+ struct page *page;
414
+ unsigned long num_returned;
415
+
416
+ spin_lock_irq(&vb->free_page_list_lock);
417
+ for (num_returned = 0; num_returned < num_to_return; num_returned++) {
418
+ page = balloon_page_pop(&vb->free_page_list);
419
+ if (!page)
420
+ break;
421
+ free_pages((unsigned long)page_address(page),
422
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
423
+ }
424
+ vb->num_free_page_blocks -= num_returned;
425
+ spin_unlock_irq(&vb->free_page_list_lock);
426
+
427
+ return num_returned;
428
+}
429
+
430
+static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb)
431
+{
432
+ if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
433
+ return;
434
+
435
+ /* No need to queue the work if the bit was already set. */
436
+ if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
437
+ &vb->config_read_bitmap))
438
+ return;
439
+
440
+ queue_work(vb->balloon_wq, &vb->report_free_page_work);
441
+}
442
+
325443 static void virtballoon_changed(struct virtio_device *vdev)
326444 {
327445 struct virtio_balloon *vb = vdev->priv;
328446 unsigned long flags;
329447
330448 spin_lock_irqsave(&vb->stop_update_lock, flags);
331
- if (!vb->stop_update)
332
- queue_work(system_freezable_wq, &vb->update_balloon_size_work);
449
+ if (!vb->stop_update) {
450
+ queue_work(system_freezable_wq,
451
+ &vb->update_balloon_size_work);
452
+ virtio_balloon_queue_free_page_work(vb);
453
+ }
333454 spin_unlock_irqrestore(&vb->stop_update_lock, flags);
334
-}
335
-
336
-static inline s64 towards_target(struct virtio_balloon *vb)
337
-{
338
- s64 target;
339
- u32 num_pages;
340
-
341
- virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
342
- &num_pages);
343
-
344
- /* Legacy balloon config space is LE, unlike all other devices. */
345
- if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
346
- num_pages = le32_to_cpu((__force __le32)num_pages);
347
-
348
- target = num_pages;
349
- return target - vb->num_pages;
350455 }
351456
352457 static void update_balloon_size(struct virtio_balloon *vb)
....@@ -354,11 +459,8 @@
354459 u32 actual = vb->num_pages;
355460
356461 /* Legacy balloon config space is LE, unlike all other devices. */
357
- if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
358
- actual = (__force u32)cpu_to_le32(actual);
359
-
360
- virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
361
- &actual);
462
+ virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, actual,
463
+ &actual);
362464 }
363465
364466 static void update_balloon_stats_func(struct work_struct *work)
....@@ -379,9 +481,12 @@
379481 update_balloon_size_work);
380482 diff = towards_target(vb);
381483
484
+ if (!diff)
485
+ return;
486
+
382487 if (diff > 0)
383488 diff -= fill_balloon(vb, diff);
384
- else if (diff < 0)
489
+ else
385490 diff += leak_balloon(vb, -diff);
386491 update_balloon_size(vb);
387492
....@@ -391,26 +496,52 @@
391496
392497 static int init_vqs(struct virtio_balloon *vb)
393498 {
394
- struct virtqueue *vqs[3];
395
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
396
- static const char * const names[] = { "inflate", "deflate", "stats" };
397
- int err, nvqs;
499
+ struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
500
+ vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
501
+ const char *names[VIRTIO_BALLOON_VQ_MAX];
502
+ int err;
398503
399504 /*
400
- * We expect two virtqueues: inflate and deflate, and
401
- * optionally stat.
505
+ * Inflateq and deflateq are used unconditionally. The names[]
506
+ * will be NULL if the related feature is not enabled, which will
507
+ * cause no allocation for the corresponding virtqueue in find_vqs.
402508 */
403
- nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
404
- err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
509
+ callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
510
+ names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
511
+ callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
512
+ names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
513
+ callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
514
+ names[VIRTIO_BALLOON_VQ_STATS] = NULL;
515
+ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
516
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
517
+ names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
518
+
519
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
520
+ names[VIRTIO_BALLOON_VQ_STATS] = "stats";
521
+ callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
522
+ }
523
+
524
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
525
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
526
+ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
527
+ }
528
+
529
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
530
+ names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
531
+ callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
532
+ }
533
+
534
+ err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
535
+ vqs, callbacks, names, NULL, NULL);
405536 if (err)
406537 return err;
407538
408
- vb->inflate_vq = vqs[0];
409
- vb->deflate_vq = vqs[1];
539
+ vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
540
+ vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
410541 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
411542 struct scatterlist sg;
412543 unsigned int num_stats;
413
- vb->stats_vq = vqs[2];
544
+ vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
414545
415546 /*
416547 * Prime this virtqueue with one buffer so the hypervisor can
....@@ -428,7 +559,176 @@
428559 }
429560 virtqueue_kick(vb->stats_vq);
430561 }
562
+
563
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
564
+ vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
565
+
566
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
567
+ vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING];
568
+
431569 return 0;
570
+}
571
+
572
+static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb)
573
+{
574
+ if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
575
+ &vb->config_read_bitmap)) {
576
+ /* Legacy balloon config space is LE, unlike all other devices. */
577
+ virtio_cread_le(vb->vdev, struct virtio_balloon_config,
578
+ free_page_hint_cmd_id,
579
+ &vb->cmd_id_received_cache);
580
+ }
581
+
582
+ return vb->cmd_id_received_cache;
583
+}
584
+
585
+static int send_cmd_id_start(struct virtio_balloon *vb)
586
+{
587
+ struct scatterlist sg;
588
+ struct virtqueue *vq = vb->free_page_vq;
589
+ int err, unused;
590
+
591
+ /* Detach all the used buffers from the vq */
592
+ while (virtqueue_get_buf(vq, &unused))
593
+ ;
594
+
595
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
596
+ virtio_balloon_cmd_id_received(vb));
597
+ sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
598
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
599
+ if (!err)
600
+ virtqueue_kick(vq);
601
+ return err;
602
+}
603
+
604
+static int send_cmd_id_stop(struct virtio_balloon *vb)
605
+{
606
+ struct scatterlist sg;
607
+ struct virtqueue *vq = vb->free_page_vq;
608
+ int err, unused;
609
+
610
+ /* Detach all the used buffers from the vq */
611
+ while (virtqueue_get_buf(vq, &unused))
612
+ ;
613
+
614
+ sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
615
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
616
+ if (!err)
617
+ virtqueue_kick(vq);
618
+ return err;
619
+}
620
+
621
+static int get_free_page_and_send(struct virtio_balloon *vb)
622
+{
623
+ struct virtqueue *vq = vb->free_page_vq;
624
+ struct page *page;
625
+ struct scatterlist sg;
626
+ int err, unused;
627
+ void *p;
628
+
629
+ /* Detach all the used buffers from the vq */
630
+ while (virtqueue_get_buf(vq, &unused))
631
+ ;
632
+
633
+ page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
634
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
635
+ /*
636
+ * When the allocation returns NULL, it indicates that we have got all
637
+ * the possible free pages, so return -EINTR to stop.
638
+ */
639
+ if (!page)
640
+ return -EINTR;
641
+
642
+ p = page_address(page);
643
+ sg_init_one(&sg, p, VIRTIO_BALLOON_HINT_BLOCK_BYTES);
644
+ /* There is always 1 entry reserved for the cmd id to use. */
645
+ if (vq->num_free > 1) {
646
+ err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
647
+ if (unlikely(err)) {
648
+ free_pages((unsigned long)p,
649
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
650
+ return err;
651
+ }
652
+ virtqueue_kick(vq);
653
+ spin_lock_irq(&vb->free_page_list_lock);
654
+ balloon_page_push(&vb->free_page_list, page);
655
+ vb->num_free_page_blocks++;
656
+ spin_unlock_irq(&vb->free_page_list_lock);
657
+ } else {
658
+ /*
659
+ * The vq has no available entry to add this page block, so
660
+ * just free it.
661
+ */
662
+ free_pages((unsigned long)p, VIRTIO_BALLOON_HINT_BLOCK_ORDER);
663
+ }
664
+
665
+ return 0;
666
+}
667
+
668
+static int send_free_pages(struct virtio_balloon *vb)
669
+{
670
+ int err;
671
+ u32 cmd_id_active;
672
+
673
+ while (1) {
674
+ /*
675
+ * If a stop id or a new cmd id was just received from host,
676
+ * stop the reporting.
677
+ */
678
+ cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
679
+ if (unlikely(cmd_id_active !=
680
+ virtio_balloon_cmd_id_received(vb)))
681
+ break;
682
+
683
+ /*
684
+ * The free page blocks are allocated and sent to host one by
685
+ * one.
686
+ */
687
+ err = get_free_page_and_send(vb);
688
+ if (err == -EINTR)
689
+ break;
690
+ else if (unlikely(err))
691
+ return err;
692
+ }
693
+
694
+ return 0;
695
+}
696
+
697
+static void virtio_balloon_report_free_page(struct virtio_balloon *vb)
698
+{
699
+ int err;
700
+ struct device *dev = &vb->vdev->dev;
701
+
702
+ /* Start by sending the received cmd id to host with an outbuf. */
703
+ err = send_cmd_id_start(vb);
704
+ if (unlikely(err))
705
+ dev_err(dev, "Failed to send a start id, err = %d\n", err);
706
+
707
+ err = send_free_pages(vb);
708
+ if (unlikely(err))
709
+ dev_err(dev, "Failed to send a free page, err = %d\n", err);
710
+
711
+ /* End by sending a stop id to host with an outbuf. */
712
+ err = send_cmd_id_stop(vb);
713
+ if (unlikely(err))
714
+ dev_err(dev, "Failed to send a stop id, err = %d\n", err);
715
+}
716
+
717
+static void report_free_page_func(struct work_struct *work)
718
+{
719
+ struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
720
+ report_free_page_work);
721
+ u32 cmd_id_received;
722
+
723
+ cmd_id_received = virtio_balloon_cmd_id_received(vb);
724
+ if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
725
+ /* Pass ULONG_MAX to give back all the free pages */
726
+ return_free_pages_to_mm(vb, ULONG_MAX);
727
+ } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
728
+ cmd_id_received !=
729
+ virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) {
730
+ virtio_balloon_report_free_page(vb);
731
+ }
432732 }
433733
434734 #ifdef CONFIG_BALLOON_COMPACTION
....@@ -506,46 +806,39 @@
506806 return MIGRATEPAGE_SUCCESS;
507807 }
508808
509
-static struct dentry *balloon_mount(struct file_system_type *fs_type,
510
- int flags, const char *dev_name, void *data)
809
+static int balloon_init_fs_context(struct fs_context *fc)
511810 {
512
- static const struct dentry_operations ops = {
513
- .d_dname = simple_dname,
514
- };
515
-
516
- return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
517
- BALLOON_KVM_MAGIC);
811
+ return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
518812 }
519813
520814 static struct file_system_type balloon_fs = {
521815 .name = "balloon-kvm",
522
- .mount = balloon_mount,
816
+ .init_fs_context = balloon_init_fs_context,
523817 .kill_sb = kill_anon_super,
524818 };
525819
526820 #endif /* CONFIG_BALLOON_COMPACTION */
527821
822
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
823
+ unsigned long pages_to_free)
824
+{
825
+ unsigned long blocks_to_free, blocks_freed;
826
+
827
+ pages_to_free = round_up(pages_to_free,
828
+ VIRTIO_BALLOON_HINT_BLOCK_PAGES);
829
+ blocks_to_free = pages_to_free / VIRTIO_BALLOON_HINT_BLOCK_PAGES;
830
+ blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
831
+
832
+ return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
833
+}
834
+
528835 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
529836 struct shrink_control *sc)
530837 {
531
- unsigned long pages_to_free, pages_freed = 0;
532838 struct virtio_balloon *vb = container_of(shrinker,
533839 struct virtio_balloon, shrinker);
534840
535
- pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
536
-
537
- /*
538
- * One invocation of leak_balloon can deflate at most
539
- * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
540
- * multiple times to deflate pages till reaching pages_to_free.
541
- */
542
- while (vb->num_pages && pages_to_free) {
543
- pages_to_free -= pages_freed;
544
- pages_freed += leak_balloon(vb, pages_to_free);
545
- }
546
- update_balloon_size(vb);
547
-
548
- return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
841
+ return shrink_free_pages(vb, sc->nr_to_scan);
549842 }
550843
551844 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
....@@ -554,7 +847,21 @@
554847 struct virtio_balloon *vb = container_of(shrinker,
555848 struct virtio_balloon, shrinker);
556849
557
- return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
850
+ return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
851
+}
852
+
853
+static int virtio_balloon_oom_notify(struct notifier_block *nb,
854
+ unsigned long dummy, void *parm)
855
+{
856
+ struct virtio_balloon *vb = container_of(nb,
857
+ struct virtio_balloon, oom_nb);
858
+ unsigned long *freed = parm;
859
+
860
+ *freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) /
861
+ VIRTIO_BALLOON_PAGES_PER_PAGE;
862
+ update_balloon_size(vb);
863
+
864
+ return NOTIFY_OK;
558865 }
559866
560867 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
....@@ -612,27 +919,107 @@
612919 vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
613920 if (IS_ERR(vb->vb_dev_info.inode)) {
614921 err = PTR_ERR(vb->vb_dev_info.inode);
615
- kern_unmount(balloon_mnt);
616
- goto out_del_vqs;
922
+ goto out_kern_unmount;
617923 }
618924 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
619925 #endif
620
- /*
621
- * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
622
- * shrinker needs to be registered to relieve memory pressure.
623
- */
624
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
926
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
927
+ /*
928
+ * There is always one entry reserved for cmd id, so the ring
929
+ * size needs to be at least two to report free page hints.
930
+ */
931
+ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
932
+ err = -ENOSPC;
933
+ goto out_iput;
934
+ }
935
+ vb->balloon_wq = alloc_workqueue("balloon-wq",
936
+ WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
937
+ if (!vb->balloon_wq) {
938
+ err = -ENOMEM;
939
+ goto out_iput;
940
+ }
941
+ INIT_WORK(&vb->report_free_page_work, report_free_page_func);
942
+ vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
943
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
944
+ VIRTIO_BALLOON_CMD_ID_STOP);
945
+ vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
946
+ VIRTIO_BALLOON_CMD_ID_STOP);
947
+ spin_lock_init(&vb->free_page_list_lock);
948
+ INIT_LIST_HEAD(&vb->free_page_list);
949
+ /*
950
+ * We're allowed to reuse any free pages, even if they are
951
+ * still to be processed by the host.
952
+ */
625953 err = virtio_balloon_register_shrinker(vb);
626954 if (err)
627
- goto out_del_vqs;
955
+ goto out_del_balloon_wq;
628956 }
957
+
958
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
959
+ vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
960
+ vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
961
+ err = register_oom_notifier(&vb->oom_nb);
962
+ if (err < 0)
963
+ goto out_unregister_shrinker;
964
+ }
965
+
966
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
967
+ /* Start with poison val of 0 representing general init */
968
+ __u32 poison_val = 0;
969
+
970
+ /*
971
+ * Let the hypervisor know that we are expecting a
972
+ * specific value to be written back in balloon pages.
973
+ *
974
+ * If the PAGE_POISON value was larger than a byte we would
975
+ * need to byte swap poison_val here to guarantee it is
976
+ * little-endian. However for now it is a single byte so we
977
+ * can pass it as-is.
978
+ */
979
+ if (!want_init_on_free())
980
+ memset(&poison_val, PAGE_POISON, sizeof(poison_val));
981
+
982
+ virtio_cwrite_le(vb->vdev, struct virtio_balloon_config,
983
+ poison_val, &poison_val);
984
+ }
985
+
986
+ vb->pr_dev_info.report = virtballoon_free_page_report;
987
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
988
+ unsigned int capacity;
989
+
990
+ capacity = virtqueue_get_vring_size(vb->reporting_vq);
991
+ if (capacity < PAGE_REPORTING_CAPACITY) {
992
+ err = -ENOSPC;
993
+ goto out_unregister_oom;
994
+ }
995
+
996
+ err = page_reporting_register(&vb->pr_dev_info);
997
+ if (err)
998
+ goto out_unregister_oom;
999
+ }
1000
+
6291001 virtio_device_ready(vdev);
6301002
6311003 if (towards_target(vb))
6321004 virtballoon_changed(vdev);
6331005 return 0;
6341006
1007
+out_unregister_oom:
1008
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
1009
+ unregister_oom_notifier(&vb->oom_nb);
1010
+out_unregister_shrinker:
1011
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
1012
+ virtio_balloon_unregister_shrinker(vb);
1013
+out_del_balloon_wq:
1014
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
1015
+ destroy_workqueue(vb->balloon_wq);
1016
+out_iput:
1017
+#ifdef CONFIG_BALLOON_COMPACTION
1018
+ iput(vb->vb_dev_info.inode);
1019
+out_kern_unmount:
1020
+ kern_unmount(balloon_mnt);
6351021 out_del_vqs:
1022
+#endif
6361023 vdev->config->del_vqs(vdev);
6371024 out_free_vb:
6381025 kfree(vb);
....@@ -647,6 +1034,10 @@
6471034 leak_balloon(vb, vb->num_pages);
6481035 update_balloon_size(vb);
6491036
1037
+ /* There might be free pages that are being reported: release them. */
1038
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
1039
+ return_free_pages_to_mm(vb, ULONG_MAX);
1040
+
6501041 /* Now we reset the device so we can clean up the queues. */
6511042 vb->vdev->config->reset(vb->vdev);
6521043
....@@ -657,13 +1048,22 @@
6571048 {
6581049 struct virtio_balloon *vb = vdev->priv;
6591050
1051
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
1052
+ page_reporting_unregister(&vb->pr_dev_info);
6601053 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
1054
+ unregister_oom_notifier(&vb->oom_nb);
1055
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
6611056 virtio_balloon_unregister_shrinker(vb);
6621057 spin_lock_irq(&vb->stop_update_lock);
6631058 vb->stop_update = true;
6641059 spin_unlock_irq(&vb->stop_update_lock);
6651060 cancel_work_sync(&vb->update_balloon_size_work);
6661061 cancel_work_sync(&vb->update_balloon_stats_work);
1062
+
1063
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
1064
+ cancel_work_sync(&vb->report_free_page_work);
1065
+ destroy_workqueue(vb->balloon_wq);
1066
+ }
6671067
6681068 remove_common(vb);
6691069 #ifdef CONFIG_BALLOON_COMPACTION
....@@ -708,7 +1108,18 @@
7081108
7091109 static int virtballoon_validate(struct virtio_device *vdev)
7101110 {
711
- __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
1111
+ /*
1112
+ * Inform the hypervisor that our pages are poisoned or
1113
+ * initialized. If we cannot do that then we should disable
1114
+ * page reporting as it could potentially change the contents
1115
+ * of our free pages.
1116
+ */
1117
+ if (!want_init_on_free() && !page_poisoning_enabled_static())
1118
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
1119
+ else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
1120
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
1121
+
1122
+ __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
7121123 return 0;
7131124 }
7141125
....@@ -716,6 +1127,9 @@
7161127 VIRTIO_BALLOON_F_MUST_TELL_HOST,
7171128 VIRTIO_BALLOON_F_STATS_VQ,
7181129 VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
1130
+ VIRTIO_BALLOON_F_FREE_PAGE_HINT,
1131
+ VIRTIO_BALLOON_F_PAGE_POISON,
1132
+ VIRTIO_BALLOON_F_REPORTING,
7191133 };
7201134
7211135 static struct virtio_driver virtio_balloon_driver = {