.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * Virtio balloon implementation, inspired by Dor Laor and Marcelo |
---|
3 | 4 | * Tosatti's implementations. |
---|
4 | 5 | * |
---|
5 | 6 | * Copyright 2008 Rusty Russell IBM Corporation |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify |
---|
8 | | - * it under the terms of the GNU General Public License as published by |
---|
9 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
10 | | - * (at your option) any later version. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope that it will be useful, |
---|
13 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | | - * GNU General Public License for more details. |
---|
16 | | - * |
---|
17 | | - * You should have received a copy of the GNU General Public License |
---|
18 | | - * along with this program; if not, write to the Free Software |
---|
19 | | - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
20 | 7 | */ |
---|
21 | 8 | |
---|
22 | 9 | #include <linux/virtio.h> |
---|
.. | .. |
---|
27 | 14 | #include <linux/slab.h> |
---|
28 | 15 | #include <linux/module.h> |
---|
29 | 16 | #include <linux/balloon_compaction.h> |
---|
| 17 | +#include <linux/oom.h> |
---|
30 | 18 | #include <linux/wait.h> |
---|
31 | 19 | #include <linux/mm.h> |
---|
32 | 20 | #include <linux/mount.h> |
---|
33 | 21 | #include <linux/magic.h> |
---|
| 22 | +#include <linux/pseudo_fs.h> |
---|
| 23 | +#include <linux/page_reporting.h> |
---|
34 | 24 | |
---|
35 | 25 | /* |
---|
36 | 26 | * Balloon device works in 4K page units. So each page is pointed to by |
---|
.. | .. |
---|
39 | 29 | */ |
---|
40 | 30 | #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) |
---|
41 | 31 | #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 |
---|
42 | | -#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 |
---|
| 32 | +/* Maximum number of (4k) pages to deflate on OOM notifications. */ |
---|
| 33 | +#define VIRTIO_BALLOON_OOM_NR_PAGES 256 |
---|
| 34 | +#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80 |
---|
| 35 | + |
---|
| 36 | +#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ |
---|
| 37 | + __GFP_NOMEMALLOC) |
---|
| 38 | +/* The order of free page blocks to report to host */ |
---|
| 39 | +#define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1) |
---|
| 40 | +/* The size of a free page block in bytes */ |
---|
| 41 | +#define VIRTIO_BALLOON_HINT_BLOCK_BYTES \ |
---|
| 42 | + (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) |
---|
| 43 | +#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) |
---|
43 | 44 | |
---|
44 | 45 | #ifdef CONFIG_BALLOON_COMPACTION |
---|
45 | 46 | static struct vfsmount *balloon_mnt; |
---|
46 | 47 | #endif |
---|
47 | 48 | |
---|
| 49 | +enum virtio_balloon_vq { |
---|
| 50 | + VIRTIO_BALLOON_VQ_INFLATE, |
---|
| 51 | + VIRTIO_BALLOON_VQ_DEFLATE, |
---|
| 52 | + VIRTIO_BALLOON_VQ_STATS, |
---|
| 53 | + VIRTIO_BALLOON_VQ_FREE_PAGE, |
---|
| 54 | + VIRTIO_BALLOON_VQ_REPORTING, |
---|
| 55 | + VIRTIO_BALLOON_VQ_MAX |
---|
| 56 | +}; |
---|
| 57 | + |
---|
| 58 | +enum virtio_balloon_config_read { |
---|
| 59 | + VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0, |
---|
| 60 | +}; |
---|
| 61 | + |
---|
48 | 62 | struct virtio_balloon { |
---|
49 | 63 | struct virtio_device *vdev; |
---|
50 | | - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; |
---|
| 64 | + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; |
---|
| 65 | + |
---|
| 66 | + /* Balloon's own wq for cpu-intensive work items */ |
---|
| 67 | + struct workqueue_struct *balloon_wq; |
---|
| 68 | + /* The free page reporting work item submitted to the balloon wq */ |
---|
| 69 | + struct work_struct report_free_page_work; |
---|
51 | 70 | |
---|
52 | 71 | /* The balloon servicing is delegated to a freezable workqueue. */ |
---|
53 | 72 | struct work_struct update_balloon_stats_work; |
---|
.. | .. |
---|
56 | 75 | /* Prevent updating balloon when it is being canceled. */ |
---|
57 | 76 | spinlock_t stop_update_lock; |
---|
58 | 77 | bool stop_update; |
---|
| 78 | + /* Bitmap to indicate if reading the related config fields are needed */ |
---|
| 79 | + unsigned long config_read_bitmap; |
---|
| 80 | + |
---|
| 81 | + /* The list of allocated free pages, waiting to be given back to mm */ |
---|
| 82 | + struct list_head free_page_list; |
---|
| 83 | + spinlock_t free_page_list_lock; |
---|
| 84 | + /* The number of free page blocks on the above list */ |
---|
| 85 | + unsigned long num_free_page_blocks; |
---|
| 86 | + /* |
---|
| 87 | + * The cmd id received from host. |
---|
| 88 | + * Read it via virtio_balloon_cmd_id_received to get the latest value |
---|
| 89 | + * sent from host. |
---|
| 90 | + */ |
---|
| 91 | + u32 cmd_id_received_cache; |
---|
| 92 | + /* The cmd id that is actively in use */ |
---|
| 93 | + __virtio32 cmd_id_active; |
---|
| 94 | + /* Buffer to store the stop sign */ |
---|
| 95 | + __virtio32 cmd_id_stop; |
---|
59 | 96 | |
---|
60 | 97 | /* Waiting for host to ack the pages we released. */ |
---|
61 | 98 | wait_queue_head_t acked; |
---|
.. | .. |
---|
80 | 117 | /* Memory statistics */ |
---|
81 | 118 | struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; |
---|
82 | 119 | |
---|
83 | | - /* To register a shrinker to shrink memory upon memory pressure */ |
---|
| 120 | + /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */ |
---|
84 | 121 | struct shrinker shrinker; |
---|
| 122 | + |
---|
| 123 | + /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */ |
---|
| 124 | + struct notifier_block oom_nb; |
---|
| 125 | + |
---|
| 126 | + /* Free page reporting device */ |
---|
| 127 | + struct virtqueue *reporting_vq; |
---|
| 128 | + struct page_reporting_dev_info pr_dev_info; |
---|
85 | 129 | }; |
---|
86 | 130 | |
---|
87 | | -static struct virtio_device_id id_table[] = { |
---|
| 131 | +static const struct virtio_device_id id_table[] = { |
---|
88 | 132 | { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, |
---|
89 | 133 | { 0 }, |
---|
90 | 134 | }; |
---|
.. | .. |
---|
119 | 163 | /* When host has read buffer, this completes via balloon_ack */ |
---|
120 | 164 | wait_event(vb->acked, virtqueue_get_buf(vq, &len)); |
---|
121 | 165 | |
---|
| 166 | +} |
---|
| 167 | + |
---|
| 168 | +static int virtballoon_free_page_report(struct page_reporting_dev_info *pr_dev_info, |
---|
| 169 | + struct scatterlist *sg, unsigned int nents) |
---|
| 170 | +{ |
---|
| 171 | + struct virtio_balloon *vb = |
---|
| 172 | + container_of(pr_dev_info, struct virtio_balloon, pr_dev_info); |
---|
| 173 | + struct virtqueue *vq = vb->reporting_vq; |
---|
| 174 | + unsigned int unused, err; |
---|
| 175 | + |
---|
| 176 | + /* We should always be able to add these buffers to an empty queue. */ |
---|
| 177 | + err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT | __GFP_NOWARN); |
---|
| 178 | + |
---|
| 179 | + /* |
---|
| 180 | + * In the extremely unlikely case that something has occurred and we |
---|
| 181 | + * are able to trigger an error we will simply display a warning |
---|
| 182 | + * and exit without actually processing the pages. |
---|
| 183 | + */ |
---|
| 184 | + if (WARN_ON_ONCE(err)) |
---|
| 185 | + return err; |
---|
| 186 | + |
---|
| 187 | + virtqueue_kick(vq); |
---|
| 188 | + |
---|
| 189 | + /* When host has read buffer, this completes via balloon_ack */ |
---|
| 190 | + wait_event(vb->acked, virtqueue_get_buf(vq, &unused)); |
---|
| 191 | + |
---|
| 192 | + return 0; |
---|
122 | 193 | } |
---|
123 | 194 | |
---|
124 | 195 | static void set_page_pfns(struct virtio_balloon *vb, |
---|
.. | .. |
---|
322 | 393 | virtqueue_kick(vq); |
---|
323 | 394 | } |
---|
324 | 395 | |
---|
| 396 | +static inline s64 towards_target(struct virtio_balloon *vb) |
---|
| 397 | +{ |
---|
| 398 | + s64 target; |
---|
| 399 | + u32 num_pages; |
---|
| 400 | + |
---|
| 401 | + /* Legacy balloon config space is LE, unlike all other devices. */ |
---|
| 402 | + virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages, |
---|
| 403 | + &num_pages); |
---|
| 404 | + |
---|
| 405 | + target = num_pages; |
---|
| 406 | + return target - vb->num_pages; |
---|
| 407 | +} |
---|
| 408 | + |
---|
| 409 | +/* Gives back @num_to_return blocks of free pages to mm. */ |
---|
| 410 | +static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, |
---|
| 411 | + unsigned long num_to_return) |
---|
| 412 | +{ |
---|
| 413 | + struct page *page; |
---|
| 414 | + unsigned long num_returned; |
---|
| 415 | + |
---|
| 416 | + spin_lock_irq(&vb->free_page_list_lock); |
---|
| 417 | + for (num_returned = 0; num_returned < num_to_return; num_returned++) { |
---|
| 418 | + page = balloon_page_pop(&vb->free_page_list); |
---|
| 419 | + if (!page) |
---|
| 420 | + break; |
---|
| 421 | + free_pages((unsigned long)page_address(page), |
---|
| 422 | + VIRTIO_BALLOON_HINT_BLOCK_ORDER); |
---|
| 423 | + } |
---|
| 424 | + vb->num_free_page_blocks -= num_returned; |
---|
| 425 | + spin_unlock_irq(&vb->free_page_list_lock); |
---|
| 426 | + |
---|
| 427 | + return num_returned; |
---|
| 428 | +} |
---|
| 429 | + |
---|
| 430 | +static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) |
---|
| 431 | +{ |
---|
| 432 | + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
| 433 | + return; |
---|
| 434 | + |
---|
| 435 | + /* No need to queue the work if the bit was already set. */ |
---|
| 436 | + if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, |
---|
| 437 | + &vb->config_read_bitmap)) |
---|
| 438 | + return; |
---|
| 439 | + |
---|
| 440 | + queue_work(vb->balloon_wq, &vb->report_free_page_work); |
---|
| 441 | +} |
---|
| 442 | + |
---|
325 | 443 | static void virtballoon_changed(struct virtio_device *vdev) |
---|
326 | 444 | { |
---|
327 | 445 | struct virtio_balloon *vb = vdev->priv; |
---|
328 | 446 | unsigned long flags; |
---|
329 | 447 | |
---|
330 | 448 | spin_lock_irqsave(&vb->stop_update_lock, flags); |
---|
331 | | - if (!vb->stop_update) |
---|
332 | | - queue_work(system_freezable_wq, &vb->update_balloon_size_work); |
---|
| 449 | + if (!vb->stop_update) { |
---|
| 450 | + queue_work(system_freezable_wq, |
---|
| 451 | + &vb->update_balloon_size_work); |
---|
| 452 | + virtio_balloon_queue_free_page_work(vb); |
---|
| 453 | + } |
---|
333 | 454 | spin_unlock_irqrestore(&vb->stop_update_lock, flags); |
---|
334 | | -} |
---|
335 | | - |
---|
336 | | -static inline s64 towards_target(struct virtio_balloon *vb) |
---|
337 | | -{ |
---|
338 | | - s64 target; |
---|
339 | | - u32 num_pages; |
---|
340 | | - |
---|
341 | | - virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, |
---|
342 | | - &num_pages); |
---|
343 | | - |
---|
344 | | - /* Legacy balloon config space is LE, unlike all other devices. */ |
---|
345 | | - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) |
---|
346 | | - num_pages = le32_to_cpu((__force __le32)num_pages); |
---|
347 | | - |
---|
348 | | - target = num_pages; |
---|
349 | | - return target - vb->num_pages; |
---|
350 | 455 | } |
---|
351 | 456 | |
---|
352 | 457 | static void update_balloon_size(struct virtio_balloon *vb) |
---|
.. | .. |
---|
354 | 459 | u32 actual = vb->num_pages; |
---|
355 | 460 | |
---|
356 | 461 | /* Legacy balloon config space is LE, unlike all other devices. */ |
---|
357 | | - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) |
---|
358 | | - actual = (__force u32)cpu_to_le32(actual); |
---|
359 | | - |
---|
360 | | - virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, |
---|
361 | | - &actual); |
---|
| 462 | + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, actual, |
---|
| 463 | + &actual); |
---|
362 | 464 | } |
---|
363 | 465 | |
---|
364 | 466 | static void update_balloon_stats_func(struct work_struct *work) |
---|
.. | .. |
---|
379 | 481 | update_balloon_size_work); |
---|
380 | 482 | diff = towards_target(vb); |
---|
381 | 483 | |
---|
| 484 | + if (!diff) |
---|
| 485 | + return; |
---|
| 486 | + |
---|
382 | 487 | if (diff > 0) |
---|
383 | 488 | diff -= fill_balloon(vb, diff); |
---|
384 | | - else if (diff < 0) |
---|
| 489 | + else |
---|
385 | 490 | diff += leak_balloon(vb, -diff); |
---|
386 | 491 | update_balloon_size(vb); |
---|
387 | 492 | |
---|
.. | .. |
---|
391 | 496 | |
---|
392 | 497 | static int init_vqs(struct virtio_balloon *vb) |
---|
393 | 498 | { |
---|
394 | | - struct virtqueue *vqs[3]; |
---|
395 | | - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; |
---|
396 | | - static const char * const names[] = { "inflate", "deflate", "stats" }; |
---|
397 | | - int err, nvqs; |
---|
| 499 | + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; |
---|
| 500 | + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; |
---|
| 501 | + const char *names[VIRTIO_BALLOON_VQ_MAX]; |
---|
| 502 | + int err; |
---|
398 | 503 | |
---|
399 | 504 | /* |
---|
400 | | - * We expect two virtqueues: inflate and deflate, and |
---|
401 | | - * optionally stat. |
---|
| 505 | + * Inflateq and deflateq are used unconditionally. The names[] |
---|
| 506 | + * will be NULL if the related feature is not enabled, which will |
---|
| 507 | + * cause no allocation for the corresponding virtqueue in find_vqs. |
---|
402 | 508 | */ |
---|
403 | | - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; |
---|
404 | | - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); |
---|
| 509 | + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; |
---|
| 510 | + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; |
---|
| 511 | + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; |
---|
| 512 | + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; |
---|
| 513 | + callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; |
---|
| 514 | + names[VIRTIO_BALLOON_VQ_STATS] = NULL; |
---|
| 515 | + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; |
---|
| 516 | + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; |
---|
| 517 | + names[VIRTIO_BALLOON_VQ_REPORTING] = NULL; |
---|
| 518 | + |
---|
| 519 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { |
---|
| 520 | + names[VIRTIO_BALLOON_VQ_STATS] = "stats"; |
---|
| 521 | + callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; |
---|
| 522 | + } |
---|
| 523 | + |
---|
| 524 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { |
---|
| 525 | + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; |
---|
| 526 | + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; |
---|
| 527 | + } |
---|
| 528 | + |
---|
| 529 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) { |
---|
| 530 | + names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq"; |
---|
| 531 | + callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack; |
---|
| 532 | + } |
---|
| 533 | + |
---|
| 534 | + err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, |
---|
| 535 | + vqs, callbacks, names, NULL, NULL); |
---|
405 | 536 | if (err) |
---|
406 | 537 | return err; |
---|
407 | 538 | |
---|
408 | | - vb->inflate_vq = vqs[0]; |
---|
409 | | - vb->deflate_vq = vqs[1]; |
---|
| 539 | + vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; |
---|
| 540 | + vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; |
---|
410 | 541 | if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { |
---|
411 | 542 | struct scatterlist sg; |
---|
412 | 543 | unsigned int num_stats; |
---|
413 | | - vb->stats_vq = vqs[2]; |
---|
| 544 | + vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; |
---|
414 | 545 | |
---|
415 | 546 | /* |
---|
416 | 547 | * Prime this virtqueue with one buffer so the hypervisor can |
---|
.. | .. |
---|
428 | 559 | } |
---|
429 | 560 | virtqueue_kick(vb->stats_vq); |
---|
430 | 561 | } |
---|
| 562 | + |
---|
| 563 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
| 564 | + vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; |
---|
| 565 | + |
---|
| 566 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) |
---|
| 567 | + vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING]; |
---|
| 568 | + |
---|
431 | 569 | return 0; |
---|
| 570 | +} |
---|
| 571 | + |
---|
| 572 | +static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) |
---|
| 573 | +{ |
---|
| 574 | + if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, |
---|
| 575 | + &vb->config_read_bitmap)) { |
---|
| 576 | + /* Legacy balloon config space is LE, unlike all other devices. */ |
---|
| 577 | + virtio_cread_le(vb->vdev, struct virtio_balloon_config, |
---|
| 578 | + free_page_hint_cmd_id, |
---|
| 579 | + &vb->cmd_id_received_cache); |
---|
| 580 | + } |
---|
| 581 | + |
---|
| 582 | + return vb->cmd_id_received_cache; |
---|
| 583 | +} |
---|
| 584 | + |
---|
| 585 | +static int send_cmd_id_start(struct virtio_balloon *vb) |
---|
| 586 | +{ |
---|
| 587 | + struct scatterlist sg; |
---|
| 588 | + struct virtqueue *vq = vb->free_page_vq; |
---|
| 589 | + int err, unused; |
---|
| 590 | + |
---|
| 591 | + /* Detach all the used buffers from the vq */ |
---|
| 592 | + while (virtqueue_get_buf(vq, &unused)) |
---|
| 593 | + ; |
---|
| 594 | + |
---|
| 595 | + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, |
---|
| 596 | + virtio_balloon_cmd_id_received(vb)); |
---|
| 597 | + sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); |
---|
| 598 | + err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); |
---|
| 599 | + if (!err) |
---|
| 600 | + virtqueue_kick(vq); |
---|
| 601 | + return err; |
---|
| 602 | +} |
---|
| 603 | + |
---|
| 604 | +static int send_cmd_id_stop(struct virtio_balloon *vb) |
---|
| 605 | +{ |
---|
| 606 | + struct scatterlist sg; |
---|
| 607 | + struct virtqueue *vq = vb->free_page_vq; |
---|
| 608 | + int err, unused; |
---|
| 609 | + |
---|
| 610 | + /* Detach all the used buffers from the vq */ |
---|
| 611 | + while (virtqueue_get_buf(vq, &unused)) |
---|
| 612 | + ; |
---|
| 613 | + |
---|
| 614 | + sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop)); |
---|
| 615 | + err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL); |
---|
| 616 | + if (!err) |
---|
| 617 | + virtqueue_kick(vq); |
---|
| 618 | + return err; |
---|
| 619 | +} |
---|
| 620 | + |
---|
| 621 | +static int get_free_page_and_send(struct virtio_balloon *vb) |
---|
| 622 | +{ |
---|
| 623 | + struct virtqueue *vq = vb->free_page_vq; |
---|
| 624 | + struct page *page; |
---|
| 625 | + struct scatterlist sg; |
---|
| 626 | + int err, unused; |
---|
| 627 | + void *p; |
---|
| 628 | + |
---|
| 629 | + /* Detach all the used buffers from the vq */ |
---|
| 630 | + while (virtqueue_get_buf(vq, &unused)) |
---|
| 631 | + ; |
---|
| 632 | + |
---|
| 633 | + page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG, |
---|
| 634 | + VIRTIO_BALLOON_HINT_BLOCK_ORDER); |
---|
| 635 | + /* |
---|
| 636 | + * When the allocation returns NULL, it indicates that we have got all |
---|
| 637 | + * the possible free pages, so return -EINTR to stop. |
---|
| 638 | + */ |
---|
| 639 | + if (!page) |
---|
| 640 | + return -EINTR; |
---|
| 641 | + |
---|
| 642 | + p = page_address(page); |
---|
| 643 | + sg_init_one(&sg, p, VIRTIO_BALLOON_HINT_BLOCK_BYTES); |
---|
| 644 | + /* There is always 1 entry reserved for the cmd id to use. */ |
---|
| 645 | + if (vq->num_free > 1) { |
---|
| 646 | + err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL); |
---|
| 647 | + if (unlikely(err)) { |
---|
| 648 | + free_pages((unsigned long)p, |
---|
| 649 | + VIRTIO_BALLOON_HINT_BLOCK_ORDER); |
---|
| 650 | + return err; |
---|
| 651 | + } |
---|
| 652 | + virtqueue_kick(vq); |
---|
| 653 | + spin_lock_irq(&vb->free_page_list_lock); |
---|
| 654 | + balloon_page_push(&vb->free_page_list, page); |
---|
| 655 | + vb->num_free_page_blocks++; |
---|
| 656 | + spin_unlock_irq(&vb->free_page_list_lock); |
---|
| 657 | + } else { |
---|
| 658 | + /* |
---|
| 659 | + * The vq has no available entry to add this page block, so |
---|
| 660 | + * just free it. |
---|
| 661 | + */ |
---|
| 662 | + free_pages((unsigned long)p, VIRTIO_BALLOON_HINT_BLOCK_ORDER); |
---|
| 663 | + } |
---|
| 664 | + |
---|
| 665 | + return 0; |
---|
| 666 | +} |
---|
| 667 | + |
---|
| 668 | +static int send_free_pages(struct virtio_balloon *vb) |
---|
| 669 | +{ |
---|
| 670 | + int err; |
---|
| 671 | + u32 cmd_id_active; |
---|
| 672 | + |
---|
| 673 | + while (1) { |
---|
| 674 | + /* |
---|
| 675 | + * If a stop id or a new cmd id was just received from host, |
---|
| 676 | + * stop the reporting. |
---|
| 677 | + */ |
---|
| 678 | + cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); |
---|
| 679 | + if (unlikely(cmd_id_active != |
---|
| 680 | + virtio_balloon_cmd_id_received(vb))) |
---|
| 681 | + break; |
---|
| 682 | + |
---|
| 683 | + /* |
---|
| 684 | + * The free page blocks are allocated and sent to host one by |
---|
| 685 | + * one. |
---|
| 686 | + */ |
---|
| 687 | + err = get_free_page_and_send(vb); |
---|
| 688 | + if (err == -EINTR) |
---|
| 689 | + break; |
---|
| 690 | + else if (unlikely(err)) |
---|
| 691 | + return err; |
---|
| 692 | + } |
---|
| 693 | + |
---|
| 694 | + return 0; |
---|
| 695 | +} |
---|
| 696 | + |
---|
| 697 | +static void virtio_balloon_report_free_page(struct virtio_balloon *vb) |
---|
| 698 | +{ |
---|
| 699 | + int err; |
---|
| 700 | + struct device *dev = &vb->vdev->dev; |
---|
| 701 | + |
---|
| 702 | + /* Start by sending the received cmd id to host with an outbuf. */ |
---|
| 703 | + err = send_cmd_id_start(vb); |
---|
| 704 | + if (unlikely(err)) |
---|
| 705 | + dev_err(dev, "Failed to send a start id, err = %d\n", err); |
---|
| 706 | + |
---|
| 707 | + err = send_free_pages(vb); |
---|
| 708 | + if (unlikely(err)) |
---|
| 709 | + dev_err(dev, "Failed to send a free page, err = %d\n", err); |
---|
| 710 | + |
---|
| 711 | + /* End by sending a stop id to host with an outbuf. */ |
---|
| 712 | + err = send_cmd_id_stop(vb); |
---|
| 713 | + if (unlikely(err)) |
---|
| 714 | + dev_err(dev, "Failed to send a stop id, err = %d\n", err); |
---|
| 715 | +} |
---|
| 716 | + |
---|
| 717 | +static void report_free_page_func(struct work_struct *work) |
---|
| 718 | +{ |
---|
| 719 | + struct virtio_balloon *vb = container_of(work, struct virtio_balloon, |
---|
| 720 | + report_free_page_work); |
---|
| 721 | + u32 cmd_id_received; |
---|
| 722 | + |
---|
| 723 | + cmd_id_received = virtio_balloon_cmd_id_received(vb); |
---|
| 724 | + if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { |
---|
| 725 | + /* Pass ULONG_MAX to give back all the free pages */ |
---|
| 726 | + return_free_pages_to_mm(vb, ULONG_MAX); |
---|
| 727 | + } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && |
---|
| 728 | + cmd_id_received != |
---|
| 729 | + virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) { |
---|
| 730 | + virtio_balloon_report_free_page(vb); |
---|
| 731 | + } |
---|
432 | 732 | } |
---|
433 | 733 | |
---|
434 | 734 | #ifdef CONFIG_BALLOON_COMPACTION |
---|
.. | .. |
---|
506 | 806 | return MIGRATEPAGE_SUCCESS; |
---|
507 | 807 | } |
---|
508 | 808 | |
---|
509 | | -static struct dentry *balloon_mount(struct file_system_type *fs_type, |
---|
510 | | - int flags, const char *dev_name, void *data) |
---|
| 809 | +static int balloon_init_fs_context(struct fs_context *fc) |
---|
511 | 810 | { |
---|
512 | | - static const struct dentry_operations ops = { |
---|
513 | | - .d_dname = simple_dname, |
---|
514 | | - }; |
---|
515 | | - |
---|
516 | | - return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, |
---|
517 | | - BALLOON_KVM_MAGIC); |
---|
| 811 | + return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; |
---|
518 | 812 | } |
---|
519 | 813 | |
---|
520 | 814 | static struct file_system_type balloon_fs = { |
---|
521 | 815 | .name = "balloon-kvm", |
---|
522 | | - .mount = balloon_mount, |
---|
| 816 | + .init_fs_context = balloon_init_fs_context, |
---|
523 | 817 | .kill_sb = kill_anon_super, |
---|
524 | 818 | }; |
---|
525 | 819 | |
---|
526 | 820 | #endif /* CONFIG_BALLOON_COMPACTION */ |
---|
527 | 821 | |
---|
| 822 | +static unsigned long shrink_free_pages(struct virtio_balloon *vb, |
---|
| 823 | + unsigned long pages_to_free) |
---|
| 824 | +{ |
---|
| 825 | + unsigned long blocks_to_free, blocks_freed; |
---|
| 826 | + |
---|
| 827 | + pages_to_free = round_up(pages_to_free, |
---|
| 828 | + VIRTIO_BALLOON_HINT_BLOCK_PAGES); |
---|
| 829 | + blocks_to_free = pages_to_free / VIRTIO_BALLOON_HINT_BLOCK_PAGES; |
---|
| 830 | + blocks_freed = return_free_pages_to_mm(vb, blocks_to_free); |
---|
| 831 | + |
---|
| 832 | + return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES; |
---|
| 833 | +} |
---|
| 834 | + |
---|
528 | 835 | static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, |
---|
529 | 836 | struct shrink_control *sc) |
---|
530 | 837 | { |
---|
531 | | - unsigned long pages_to_free, pages_freed = 0; |
---|
532 | 838 | struct virtio_balloon *vb = container_of(shrinker, |
---|
533 | 839 | struct virtio_balloon, shrinker); |
---|
534 | 840 | |
---|
535 | | - pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; |
---|
536 | | - |
---|
537 | | - /* |
---|
538 | | - * One invocation of leak_balloon can deflate at most |
---|
539 | | - * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it |
---|
540 | | - * multiple times to deflate pages till reaching pages_to_free. |
---|
541 | | - */ |
---|
542 | | - while (vb->num_pages && pages_to_free) { |
---|
543 | | - pages_to_free -= pages_freed; |
---|
544 | | - pages_freed += leak_balloon(vb, pages_to_free); |
---|
545 | | - } |
---|
546 | | - update_balloon_size(vb); |
---|
547 | | - |
---|
548 | | - return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; |
---|
| 841 | + return shrink_free_pages(vb, sc->nr_to_scan); |
---|
549 | 842 | } |
---|
550 | 843 | |
---|
551 | 844 | static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, |
---|
.. | .. |
---|
554 | 847 | struct virtio_balloon *vb = container_of(shrinker, |
---|
555 | 848 | struct virtio_balloon, shrinker); |
---|
556 | 849 | |
---|
557 | | - return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; |
---|
| 850 | + return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; |
---|
| 851 | +} |
---|
| 852 | + |
---|
| 853 | +static int virtio_balloon_oom_notify(struct notifier_block *nb, |
---|
| 854 | + unsigned long dummy, void *parm) |
---|
| 855 | +{ |
---|
| 856 | + struct virtio_balloon *vb = container_of(nb, |
---|
| 857 | + struct virtio_balloon, oom_nb); |
---|
| 858 | + unsigned long *freed = parm; |
---|
| 859 | + |
---|
| 860 | + *freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) / |
---|
| 861 | + VIRTIO_BALLOON_PAGES_PER_PAGE; |
---|
| 862 | + update_balloon_size(vb); |
---|
| 863 | + |
---|
| 864 | + return NOTIFY_OK; |
---|
558 | 865 | } |
---|
559 | 866 | |
---|
560 | 867 | static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) |
---|
.. | .. |
---|
612 | 919 | vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); |
---|
613 | 920 | if (IS_ERR(vb->vb_dev_info.inode)) { |
---|
614 | 921 | err = PTR_ERR(vb->vb_dev_info.inode); |
---|
615 | | - kern_unmount(balloon_mnt); |
---|
616 | | - goto out_del_vqs; |
---|
| 922 | + goto out_kern_unmount; |
---|
617 | 923 | } |
---|
618 | 924 | vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; |
---|
619 | 925 | #endif |
---|
620 | | - /* |
---|
621 | | - * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a |
---|
622 | | - * shrinker needs to be registered to relieve memory pressure. |
---|
623 | | - */ |
---|
624 | | - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { |
---|
| 926 | + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { |
---|
| 927 | + /* |
---|
| 928 | + * There is always one entry reserved for cmd id, so the ring |
---|
| 929 | + * size needs to be at least two to report free page hints. |
---|
| 930 | + */ |
---|
| 931 | + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { |
---|
| 932 | + err = -ENOSPC; |
---|
| 933 | + goto out_iput; |
---|
| 934 | + } |
---|
| 935 | + vb->balloon_wq = alloc_workqueue("balloon-wq", |
---|
| 936 | + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); |
---|
| 937 | + if (!vb->balloon_wq) { |
---|
| 938 | + err = -ENOMEM; |
---|
| 939 | + goto out_iput; |
---|
| 940 | + } |
---|
| 941 | + INIT_WORK(&vb->report_free_page_work, report_free_page_func); |
---|
| 942 | + vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; |
---|
| 943 | + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, |
---|
| 944 | + VIRTIO_BALLOON_CMD_ID_STOP); |
---|
| 945 | + vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, |
---|
| 946 | + VIRTIO_BALLOON_CMD_ID_STOP); |
---|
| 947 | + spin_lock_init(&vb->free_page_list_lock); |
---|
| 948 | + INIT_LIST_HEAD(&vb->free_page_list); |
---|
| 949 | + /* |
---|
| 950 | + * We're allowed to reuse any free pages, even if they are |
---|
| 951 | + * still to be processed by the host. |
---|
| 952 | + */ |
---|
625 | 953 | err = virtio_balloon_register_shrinker(vb); |
---|
626 | 954 | if (err) |
---|
627 | | - goto out_del_vqs; |
---|
| 955 | + goto out_del_balloon_wq; |
---|
628 | 956 | } |
---|
| 957 | + |
---|
| 958 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { |
---|
| 959 | + vb->oom_nb.notifier_call = virtio_balloon_oom_notify; |
---|
| 960 | + vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY; |
---|
| 961 | + err = register_oom_notifier(&vb->oom_nb); |
---|
| 962 | + if (err < 0) |
---|
| 963 | + goto out_unregister_shrinker; |
---|
| 964 | + } |
---|
| 965 | + |
---|
| 966 | + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { |
---|
| 967 | + /* Start with poison val of 0 representing general init */ |
---|
| 968 | + __u32 poison_val = 0; |
---|
| 969 | + |
---|
| 970 | + /* |
---|
| 971 | + * Let the hypervisor know that we are expecting a |
---|
| 972 | + * specific value to be written back in balloon pages. |
---|
| 973 | + * |
---|
| 974 | + * If the PAGE_POISON value was larger than a byte we would |
---|
| 975 | + * need to byte swap poison_val here to guarantee it is |
---|
| 976 | + * little-endian. However for now it is a single byte so we |
---|
| 977 | + * can pass it as-is. |
---|
| 978 | + */ |
---|
| 979 | + if (!want_init_on_free()) |
---|
| 980 | + memset(&poison_val, PAGE_POISON, sizeof(poison_val)); |
---|
| 981 | + |
---|
| 982 | + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, |
---|
| 983 | + poison_val, &poison_val); |
---|
| 984 | + } |
---|
| 985 | + |
---|
| 986 | + vb->pr_dev_info.report = virtballoon_free_page_report; |
---|
| 987 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) { |
---|
| 988 | + unsigned int capacity; |
---|
| 989 | + |
---|
| 990 | + capacity = virtqueue_get_vring_size(vb->reporting_vq); |
---|
| 991 | + if (capacity < PAGE_REPORTING_CAPACITY) { |
---|
| 992 | + err = -ENOSPC; |
---|
| 993 | + goto out_unregister_oom; |
---|
| 994 | + } |
---|
| 995 | + |
---|
| 996 | + err = page_reporting_register(&vb->pr_dev_info); |
---|
| 997 | + if (err) |
---|
| 998 | + goto out_unregister_oom; |
---|
| 999 | + } |
---|
| 1000 | + |
---|
629 | 1001 | virtio_device_ready(vdev); |
---|
630 | 1002 | |
---|
631 | 1003 | if (towards_target(vb)) |
---|
632 | 1004 | virtballoon_changed(vdev); |
---|
633 | 1005 | return 0; |
---|
634 | 1006 | |
---|
| 1007 | +out_unregister_oom: |
---|
| 1008 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) |
---|
| 1009 | + unregister_oom_notifier(&vb->oom_nb); |
---|
| 1010 | +out_unregister_shrinker: |
---|
| 1011 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
| 1012 | + virtio_balloon_unregister_shrinker(vb); |
---|
| 1013 | +out_del_balloon_wq: |
---|
| 1014 | + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
| 1015 | + destroy_workqueue(vb->balloon_wq); |
---|
| 1016 | +out_iput: |
---|
| 1017 | +#ifdef CONFIG_BALLOON_COMPACTION |
---|
| 1018 | + iput(vb->vb_dev_info.inode); |
---|
| 1019 | +out_kern_unmount: |
---|
| 1020 | + kern_unmount(balloon_mnt); |
---|
635 | 1021 | out_del_vqs: |
---|
| 1022 | +#endif |
---|
636 | 1023 | vdev->config->del_vqs(vdev); |
---|
637 | 1024 | out_free_vb: |
---|
638 | 1025 | kfree(vb); |
---|
.. | .. |
---|
647 | 1034 | leak_balloon(vb, vb->num_pages); |
---|
648 | 1035 | update_balloon_size(vb); |
---|
649 | 1036 | |
---|
| 1037 | + /* There might be free pages that are being reported: release them. */ |
---|
| 1038 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
| 1039 | + return_free_pages_to_mm(vb, ULONG_MAX); |
---|
| 1040 | + |
---|
650 | 1041 | /* Now we reset the device so we can clean up the queues. */ |
---|
651 | 1042 | vb->vdev->config->reset(vb->vdev); |
---|
652 | 1043 | |
---|
.. | .. |
---|
657 | 1048 | { |
---|
658 | 1049 | struct virtio_balloon *vb = vdev->priv; |
---|
659 | 1050 | |
---|
| 1051 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) |
---|
| 1052 | + page_reporting_unregister(&vb->pr_dev_info); |
---|
660 | 1053 | if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) |
---|
| 1054 | + unregister_oom_notifier(&vb->oom_nb); |
---|
| 1055 | + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) |
---|
661 | 1056 | virtio_balloon_unregister_shrinker(vb); |
---|
662 | 1057 | spin_lock_irq(&vb->stop_update_lock); |
---|
663 | 1058 | vb->stop_update = true; |
---|
664 | 1059 | spin_unlock_irq(&vb->stop_update_lock); |
---|
665 | 1060 | cancel_work_sync(&vb->update_balloon_size_work); |
---|
666 | 1061 | cancel_work_sync(&vb->update_balloon_stats_work); |
---|
| 1062 | + |
---|
| 1063 | + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { |
---|
| 1064 | + cancel_work_sync(&vb->report_free_page_work); |
---|
| 1065 | + destroy_workqueue(vb->balloon_wq); |
---|
| 1066 | + } |
---|
667 | 1067 | |
---|
668 | 1068 | remove_common(vb); |
---|
669 | 1069 | #ifdef CONFIG_BALLOON_COMPACTION |
---|
.. | .. |
---|
708 | 1108 | |
---|
709 | 1109 | static int virtballoon_validate(struct virtio_device *vdev) |
---|
710 | 1110 | { |
---|
711 | | - __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); |
---|
| 1111 | + /* |
---|
| 1112 | + * Inform the hypervisor that our pages are poisoned or |
---|
| 1113 | + * initialized. If we cannot do that then we should disable |
---|
| 1114 | + * page reporting as it could potentially change the contents |
---|
| 1115 | + * of our free pages. |
---|
| 1116 | + */ |
---|
| 1117 | + if (!want_init_on_free() && !page_poisoning_enabled_static()) |
---|
| 1118 | + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); |
---|
| 1119 | + else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) |
---|
| 1120 | + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING); |
---|
| 1121 | + |
---|
| 1122 | + __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM); |
---|
712 | 1123 | return 0; |
---|
713 | 1124 | } |
---|
714 | 1125 | |
---|
.. | .. |
---|
716 | 1127 | VIRTIO_BALLOON_F_MUST_TELL_HOST, |
---|
717 | 1128 | VIRTIO_BALLOON_F_STATS_VQ, |
---|
718 | 1129 | VIRTIO_BALLOON_F_DEFLATE_ON_OOM, |
---|
| 1130 | + VIRTIO_BALLOON_F_FREE_PAGE_HINT, |
---|
| 1131 | + VIRTIO_BALLOON_F_PAGE_POISON, |
---|
| 1132 | + VIRTIO_BALLOON_F_REPORTING, |
---|
719 | 1133 | }; |
---|
720 | 1134 | |
---|
721 | 1135 | static struct virtio_driver virtio_balloon_driver = { |
---|