hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/misc/vmw_balloon.c
....@@ -17,6 +17,7 @@
1717 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1818
1919 #include <linux/types.h>
20
+#include <linux/io.h>
2021 #include <linux/kernel.h>
2122 #include <linux/mm.h>
2223 #include <linux/vmalloc.h>
....@@ -25,35 +26,35 @@
2526 #include <linux/workqueue.h>
2627 #include <linux/debugfs.h>
2728 #include <linux/seq_file.h>
29
+#include <linux/rwsem.h>
30
+#include <linux/slab.h>
31
+#include <linux/spinlock.h>
32
+#include <linux/mount.h>
33
+#include <linux/pseudo_fs.h>
34
+#include <linux/balloon_compaction.h>
2835 #include <linux/vmw_vmci_defs.h>
2936 #include <linux/vmw_vmci_api.h>
3037 #include <asm/hypervisor.h>
3138
3239 MODULE_AUTHOR("VMware, Inc.");
3340 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
34
-MODULE_VERSION("1.5.0.0-k");
3541 MODULE_ALIAS("dmi:*:svnVMware*:*");
3642 MODULE_ALIAS("vmware_vmmemctl");
3743 MODULE_LICENSE("GPL");
3844
39
-/*
40
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
41
- * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
42
- * __GFP_NOWARN, to suppress page allocation failure warnings.
43
- */
44
-#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
45
+static bool __read_mostly vmwballoon_shrinker_enable;
46
+module_param(vmwballoon_shrinker_enable, bool, 0444);
47
+MODULE_PARM_DESC(vmwballoon_shrinker_enable,
48
+ "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
4549
46
-/*
47
- * Use GFP_HIGHUSER when executing in a separate kernel thread
48
- * context and allocation can sleep. This is less stressful to
49
- * the guest memory system, since it allows the thread to block
50
- * while memory is reclaimed, and won't take pages from emergency
51
- * low-memory pools.
52
- */
53
-#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
50
+/* Delay in seconds after shrink before inflation. */
51
+#define VMBALLOON_SHRINK_DELAY (5)
5452
5553 /* Maximum number of refused pages we accumulate during inflation cycle */
5654 #define VMW_BALLOON_MAX_REFUSED 16
55
+
56
+/* Magic number for the balloon mount-point */
57
+#define BALLOON_VMW_MAGIC 0x0ba11007
5758
5859 /*
5960 * Hypervisor communication port definitions.
....@@ -70,232 +71,468 @@
7071 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
7172 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
7273 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
74
+ VMW_BALLOON_64_BIT_TARGET = (1 << 5)
7375 };
7476
75
-#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
77
+#define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \
7678 | VMW_BALLOON_BATCHED_CMDS \
7779 | VMW_BALLOON_BATCHED_2M_CMDS \
7880 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
7981
80
-#define VMW_BALLOON_2M_SHIFT (9)
81
-#define VMW_BALLOON_NUM_PAGE_SIZES (2)
82
+#define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
8283
8384 /*
84
- * Backdoor commands availability:
85
- *
86
- * START, GET_TARGET and GUEST_ID are always available,
87
- *
88
- * VMW_BALLOON_BASIC_CMDS:
89
- * LOCK and UNLOCK commands,
90
- * VMW_BALLOON_BATCHED_CMDS:
91
- * BATCHED_LOCK and BATCHED_UNLOCK commands.
92
- * VMW BALLOON_BATCHED_2M_CMDS:
93
- * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94
- * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95
- * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
85
+ * 64-bit targets are only supported in 64-bit
9686 */
97
-#define VMW_BALLOON_CMD_START 0
98
-#define VMW_BALLOON_CMD_GET_TARGET 1
99
-#define VMW_BALLOON_CMD_LOCK 2
100
-#define VMW_BALLOON_CMD_UNLOCK 3
101
-#define VMW_BALLOON_CMD_GUEST_ID 4
102
-#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103
-#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104
-#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105
-#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
106
-#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
87
+#ifdef CONFIG_64BIT
88
+#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \
89
+ | VMW_BALLOON_64_BIT_TARGET)
90
+#else
91
+#define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON
92
+#endif
10793
94
+enum vmballoon_page_size_type {
95
+ VMW_BALLOON_4K_PAGE,
96
+ VMW_BALLOON_2M_PAGE,
97
+ VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
98
+};
10899
109
-/* error codes */
110
-#define VMW_BALLOON_SUCCESS 0
111
-#define VMW_BALLOON_FAILURE -1
112
-#define VMW_BALLOON_ERROR_CMD_INVALID 1
113
-#define VMW_BALLOON_ERROR_PPN_INVALID 2
114
-#define VMW_BALLOON_ERROR_PPN_LOCKED 3
115
-#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
116
-#define VMW_BALLOON_ERROR_PPN_PINNED 5
117
-#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
118
-#define VMW_BALLOON_ERROR_RESET 7
119
-#define VMW_BALLOON_ERROR_BUSY 8
100
+#define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1)
101
+
102
+static const char * const vmballoon_page_size_names[] = {
103
+ [VMW_BALLOON_4K_PAGE] = "4k",
104
+ [VMW_BALLOON_2M_PAGE] = "2M"
105
+};
106
+
107
+enum vmballoon_op {
108
+ VMW_BALLOON_INFLATE,
109
+ VMW_BALLOON_DEFLATE
110
+};
111
+
112
+enum vmballoon_op_stat_type {
113
+ VMW_BALLOON_OP_STAT,
114
+ VMW_BALLOON_OP_FAIL_STAT
115
+};
116
+
117
+#define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1)
118
+
119
+/**
120
+ * enum vmballoon_cmd_type - backdoor commands.
121
+ *
122
+ * Availability of the commands is as followed:
123
+ *
124
+ * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
125
+ * %VMW_BALLOON_CMD_GUEST_ID are always available.
126
+ *
127
+ * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
128
+ * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
129
+ *
130
+ * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
131
+ * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
132
+ * are available.
133
+ *
134
+ * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
135
+ * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
136
+ * are supported.
137
+ *
138
+ * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
139
+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
140
+ *
141
+ * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
142
+ * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
143
+ * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
144
+ * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
145
+ * to be deflated from the balloon.
146
+ * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
147
+ * runs in the VM.
148
+ * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
149
+ * ballooned pages (up to 512).
150
+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
151
+ * pages that are about to be deflated from the
152
+ * balloon (up to 512).
153
+ * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
154
+ * for 2MB pages.
155
+ * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
156
+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
157
+ * pages.
158
+ * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
159
+ * that would be invoked when the balloon
160
+ * size changes.
161
+ * @VMW_BALLOON_CMD_LAST: Value of the last command.
162
+ */
163
+enum vmballoon_cmd_type {
164
+ VMW_BALLOON_CMD_START,
165
+ VMW_BALLOON_CMD_GET_TARGET,
166
+ VMW_BALLOON_CMD_LOCK,
167
+ VMW_BALLOON_CMD_UNLOCK,
168
+ VMW_BALLOON_CMD_GUEST_ID,
169
+ /* No command 5 */
170
+ VMW_BALLOON_CMD_BATCHED_LOCK = 6,
171
+ VMW_BALLOON_CMD_BATCHED_UNLOCK,
172
+ VMW_BALLOON_CMD_BATCHED_2M_LOCK,
173
+ VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
174
+ VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
175
+ VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
176
+};
177
+
178
+#define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1)
179
+
180
+enum vmballoon_error_codes {
181
+ VMW_BALLOON_SUCCESS,
182
+ VMW_BALLOON_ERROR_CMD_INVALID,
183
+ VMW_BALLOON_ERROR_PPN_INVALID,
184
+ VMW_BALLOON_ERROR_PPN_LOCKED,
185
+ VMW_BALLOON_ERROR_PPN_UNLOCKED,
186
+ VMW_BALLOON_ERROR_PPN_PINNED,
187
+ VMW_BALLOON_ERROR_PPN_NOTNEEDED,
188
+ VMW_BALLOON_ERROR_RESET,
189
+ VMW_BALLOON_ERROR_BUSY
190
+};
120191
121192 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
122193
123
-/* Batch page description */
194
+#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
195
+ ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
196
+ (1UL << VMW_BALLOON_CMD_LOCK) | \
197
+ (1UL << VMW_BALLOON_CMD_UNLOCK) | \
198
+ (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
199
+ (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
200
+ (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
201
+ (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
124202
125
-/*
126
- * Layout of a page in the batch page:
127
- *
128
- * +-------------+----------+--------+
129
- * | | | |
130
- * | Page number | Reserved | Status |
131
- * | | | |
132
- * +-------------+----------+--------+
133
- * 64 PAGE_SHIFT 6 0
134
- *
135
- * The reserved field should be set to 0.
136
- */
137
-#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
138
-#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
139
-#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
140
-
141
-struct vmballoon_batch_page {
142
- u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
203
+static const char * const vmballoon_cmd_names[] = {
204
+ [VMW_BALLOON_CMD_START] = "start",
205
+ [VMW_BALLOON_CMD_GET_TARGET] = "target",
206
+ [VMW_BALLOON_CMD_LOCK] = "lock",
207
+ [VMW_BALLOON_CMD_UNLOCK] = "unlock",
208
+ [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
209
+ [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
210
+ [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
211
+ [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
212
+ [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
213
+ [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
143214 };
144215
145
-static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
146
-{
147
- return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
148
-}
149
-
150
-static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
151
- int idx)
152
-{
153
- return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
154
-}
155
-
156
-static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
157
- u64 pa)
158
-{
159
- batch->pages[idx] = pa;
160
-}
161
-
162
-
163
-#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
164
-({ \
165
- unsigned long __status, __dummy1, __dummy2, __dummy3; \
166
- __asm__ __volatile__ ("inl %%dx" : \
167
- "=a"(__status), \
168
- "=c"(__dummy1), \
169
- "=d"(__dummy2), \
170
- "=b"(result), \
171
- "=S" (__dummy3) : \
172
- "0"(VMW_BALLOON_HV_MAGIC), \
173
- "1"(VMW_BALLOON_CMD_##cmd), \
174
- "2"(VMW_BALLOON_HV_PORT), \
175
- "3"(arg1), \
176
- "4" (arg2) : \
177
- "memory"); \
178
- if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
179
- result = __dummy1; \
180
- result &= -1UL; \
181
- __status & -1UL; \
182
-})
183
-
184
-#ifdef CONFIG_DEBUG_FS
185
-struct vmballoon_stats {
186
- unsigned int timer;
187
- unsigned int doorbell;
188
-
189
- /* allocation statistics */
190
- unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
191
- unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
192
- unsigned int sleep_alloc;
193
- unsigned int sleep_alloc_fail;
194
- unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
195
- unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
196
- unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
197
-
198
- /* monitor operations */
199
- unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
200
- unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
201
- unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
202
- unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
203
- unsigned int target;
204
- unsigned int target_fail;
205
- unsigned int start;
206
- unsigned int start_fail;
207
- unsigned int guest_type;
208
- unsigned int guest_type_fail;
209
- unsigned int doorbell_set;
210
- unsigned int doorbell_unset;
216
+enum vmballoon_stat_page {
217
+ VMW_BALLOON_PAGE_STAT_ALLOC,
218
+ VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
219
+ VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
220
+ VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
221
+ VMW_BALLOON_PAGE_STAT_FREE,
222
+ VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
211223 };
212224
213
-#define STATS_INC(stat) (stat)++
214
-#else
215
-#define STATS_INC(stat)
216
-#endif
225
+#define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1)
217226
218
-struct vmballoon;
219
-
220
-struct vmballoon_ops {
221
- void (*add_page)(struct vmballoon *b, int idx, struct page *p);
222
- int (*lock)(struct vmballoon *b, unsigned int num_pages,
223
- bool is_2m_pages, unsigned int *target);
224
- int (*unlock)(struct vmballoon *b, unsigned int num_pages,
225
- bool is_2m_pages, unsigned int *target);
227
+enum vmballoon_stat_general {
228
+ VMW_BALLOON_STAT_TIMER,
229
+ VMW_BALLOON_STAT_DOORBELL,
230
+ VMW_BALLOON_STAT_RESET,
231
+ VMW_BALLOON_STAT_SHRINK,
232
+ VMW_BALLOON_STAT_SHRINK_FREE,
233
+ VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
226234 };
227235
228
-struct vmballoon_page_size {
229
- /* list of reserved physical pages */
236
+#define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
237
+
238
+static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
239
+static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
240
+
241
+struct vmballoon_ctl {
230242 struct list_head pages;
231
-
232
- /* transient list of non-balloonable pages */
233243 struct list_head refused_pages;
244
+ struct list_head prealloc_pages;
234245 unsigned int n_refused_pages;
246
+ unsigned int n_pages;
247
+ enum vmballoon_page_size_type page_size;
248
+ enum vmballoon_op op;
235249 };
250
+
251
+/**
252
+ * struct vmballoon_batch_entry - a batch entry for lock or unlock.
253
+ *
254
+ * @status: the status of the operation, which is written by the hypervisor.
255
+ * @reserved: reserved for future use. Must be set to zero.
256
+ * @pfn: the physical frame number of the page to be locked or unlocked.
257
+ */
258
+struct vmballoon_batch_entry {
259
+ u64 status : 5;
260
+ u64 reserved : PAGE_SHIFT - 5;
261
+ u64 pfn : 52;
262
+} __packed;
236263
237264 struct vmballoon {
238
- struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
265
+ /**
266
+ * @max_page_size: maximum supported page size for ballooning.
267
+ *
268
+ * Protected by @conf_sem
269
+ */
270
+ enum vmballoon_page_size_type max_page_size;
239271
240
- /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
241
- unsigned supported_page_sizes;
272
+ /**
273
+ * @size: balloon actual size in basic page size (frames).
274
+ *
275
+ * While we currently do not support size which is bigger than 32-bit,
276
+ * in preparation for future support, use 64-bits.
277
+ */
278
+ atomic64_t size;
242279
243
- /* balloon size in pages */
244
- unsigned int size;
245
- unsigned int target;
280
+ /**
281
+ * @target: balloon target size in basic page size (frames).
282
+ *
283
+ * We do not protect the target under the assumption that setting the
284
+ * value is always done through a single write. If this assumption ever
285
+ * breaks, we would have to use X_ONCE for accesses, and suffer the less
286
+ * optimized code. Although we may read stale target value if multiple
287
+ * accesses happen at once, the performance impact should be minor.
288
+ */
289
+ unsigned long target;
246290
247
- /* reset flag */
291
+ /**
292
+ * @reset_required: reset flag
293
+ *
294
+ * Setting this flag may introduce races, but the code is expected to
295
+ * handle them gracefully. In the worst case, another operation will
296
+ * fail as reset did not take place. Clearing the flag is done while
297
+ * holding @conf_sem for write.
298
+ */
248299 bool reset_required;
249300
301
+ /**
302
+ * @capabilities: hypervisor balloon capabilities.
303
+ *
304
+ * Protected by @conf_sem.
305
+ */
250306 unsigned long capabilities;
251307
252
- struct vmballoon_batch_page *batch_page;
308
+ /**
309
+ * @batch_page: pointer to communication batch page.
310
+ *
311
+ * When batching is used, batch_page points to a page, which holds up to
312
+ * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
313
+ */
314
+ struct vmballoon_batch_entry *batch_page;
315
+
316
+ /**
317
+ * @batch_max_pages: maximum pages that can be locked/unlocked.
318
+ *
319
+ * Indicates the number of pages that the hypervisor can lock or unlock
320
+ * at once, according to whether batching is enabled. If batching is
321
+ * disabled, only a single page can be locked/unlock on each operation.
322
+ *
323
+ * Protected by @conf_sem.
324
+ */
253325 unsigned int batch_max_pages;
326
+
327
+ /**
328
+ * @page: page to be locked/unlocked by the hypervisor
329
+ *
330
+ * @page is only used when batching is disabled and a single page is
331
+ * reclaimed on each iteration.
332
+ *
333
+ * Protected by @comm_lock.
334
+ */
254335 struct page *page;
255336
256
- const struct vmballoon_ops *ops;
337
+ /**
338
+ * @shrink_timeout: timeout until the next inflation.
339
+ *
340
+ * After an shrink event, indicates the time in jiffies after which
341
+ * inflation is allowed again. Can be written concurrently with reads,
342
+ * so must use READ_ONCE/WRITE_ONCE when accessing.
343
+ */
344
+ unsigned long shrink_timeout;
345
+
346
+ /* statistics */
347
+ struct vmballoon_stats *stats;
257348
258349 #ifdef CONFIG_DEBUG_FS
259
- /* statistics */
260
- struct vmballoon_stats stats;
261
-
262350 /* debugfs file exporting statistics */
263351 struct dentry *dbg_entry;
264352 #endif
265353
266
- struct sysinfo sysinfo;
354
+ /**
355
+ * @b_dev_info: balloon device information descriptor.
356
+ */
357
+ struct balloon_dev_info b_dev_info;
267358
268359 struct delayed_work dwork;
269360
361
+ /**
362
+ * @huge_pages - list of the inflated 2MB pages.
363
+ *
364
+ * Protected by @b_dev_info.pages_lock .
365
+ */
366
+ struct list_head huge_pages;
367
+
368
+ /**
369
+ * @vmci_doorbell.
370
+ *
371
+ * Protected by @conf_sem.
372
+ */
270373 struct vmci_handle vmci_doorbell;
374
+
375
+ /**
376
+ * @conf_sem: semaphore to protect the configuration and the statistics.
377
+ */
378
+ struct rw_semaphore conf_sem;
379
+
380
+ /**
381
+ * @comm_lock: lock to protect the communication with the host.
382
+ *
383
+ * Lock ordering: @conf_sem -> @comm_lock .
384
+ */
385
+ spinlock_t comm_lock;
386
+
387
+ /**
388
+ * @shrinker: shrinker interface that is used to avoid over-inflation.
389
+ */
390
+ struct shrinker shrinker;
391
+
392
+ /**
393
+ * @shrinker_registered: whether the shrinker was registered.
394
+ *
395
+ * The shrinker interface does not handle gracefully the removal of
396
+ * shrinker that was not registered before. This indication allows to
397
+ * simplify the unregistration process.
398
+ */
399
+ bool shrinker_registered;
271400 };
272401
273402 static struct vmballoon balloon;
403
+
404
+struct vmballoon_stats {
405
+ /* timer / doorbell operations */
406
+ atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
407
+
408
+ /* allocation statistics for huge and small pages */
409
+ atomic64_t
410
+ page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
411
+
412
+ /* Monitor operations: total operations, and failures */
413
+ atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
414
+};
415
+
416
+static inline bool is_vmballoon_stats_on(void)
417
+{
418
+ return IS_ENABLED(CONFIG_DEBUG_FS) &&
419
+ static_branch_unlikely(&balloon_stat_enabled);
420
+}
421
+
422
+static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
423
+ enum vmballoon_op_stat_type type)
424
+{
425
+ if (is_vmballoon_stats_on())
426
+ atomic64_inc(&b->stats->ops[op][type]);
427
+}
428
+
429
+static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
430
+ enum vmballoon_stat_general stat)
431
+{
432
+ if (is_vmballoon_stats_on())
433
+ atomic64_inc(&b->stats->general_stat[stat]);
434
+}
435
+
436
+static inline void vmballoon_stats_gen_add(struct vmballoon *b,
437
+ enum vmballoon_stat_general stat,
438
+ unsigned int val)
439
+{
440
+ if (is_vmballoon_stats_on())
441
+ atomic64_add(val, &b->stats->general_stat[stat]);
442
+}
443
+
444
+static inline void vmballoon_stats_page_inc(struct vmballoon *b,
445
+ enum vmballoon_stat_page stat,
446
+ enum vmballoon_page_size_type size)
447
+{
448
+ if (is_vmballoon_stats_on())
449
+ atomic64_inc(&b->stats->page_stat[stat][size]);
450
+}
451
+
452
+static inline void vmballoon_stats_page_add(struct vmballoon *b,
453
+ enum vmballoon_stat_page stat,
454
+ enum vmballoon_page_size_type size,
455
+ unsigned int val)
456
+{
457
+ if (is_vmballoon_stats_on())
458
+ atomic64_add(val, &b->stats->page_stat[stat][size]);
459
+}
460
+
461
+static inline unsigned long
462
+__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
463
+ unsigned long arg2, unsigned long *result)
464
+{
465
+ unsigned long status, dummy1, dummy2, dummy3, local_result;
466
+
467
+ vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
468
+
469
+ asm volatile ("inl %%dx" :
470
+ "=a"(status),
471
+ "=c"(dummy1),
472
+ "=d"(dummy2),
473
+ "=b"(local_result),
474
+ "=S"(dummy3) :
475
+ "0"(VMW_BALLOON_HV_MAGIC),
476
+ "1"(cmd),
477
+ "2"(VMW_BALLOON_HV_PORT),
478
+ "3"(arg1),
479
+ "4"(arg2) :
480
+ "memory");
481
+
482
+ /* update the result if needed */
483
+ if (result)
484
+ *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
485
+ local_result;
486
+
487
+ /* update target when applicable */
488
+ if (status == VMW_BALLOON_SUCCESS &&
489
+ ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
490
+ WRITE_ONCE(b->target, local_result);
491
+
492
+ if (status != VMW_BALLOON_SUCCESS &&
493
+ status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
494
+ vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
495
+ pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
496
+ __func__, vmballoon_cmd_names[cmd], arg1, arg2,
497
+ status);
498
+ }
499
+
500
+ /* mark reset required accordingly */
501
+ if (status == VMW_BALLOON_ERROR_RESET)
502
+ b->reset_required = true;
503
+
504
+ return status;
505
+}
506
+
507
+static __always_inline unsigned long
508
+vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
509
+ unsigned long arg2)
510
+{
511
+ unsigned long dummy;
512
+
513
+ return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
514
+}
274515
275516 /*
276517 * Send "start" command to the host, communicating supported version
277518 * of the protocol.
278519 */
279
-static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
520
+static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
280521 {
281
- unsigned long status, capabilities, dummy = 0;
282
- bool success;
522
+ unsigned long status, capabilities;
283523
284
- STATS_INC(b->stats.start);
285
-
286
- status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
524
+ status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
525
+ &capabilities);
287526
288527 switch (status) {
289528 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
290529 b->capabilities = capabilities;
291
- success = true;
292530 break;
293531 case VMW_BALLOON_SUCCESS:
294532 b->capabilities = VMW_BALLOON_BASIC_CMDS;
295
- success = true;
296533 break;
297534 default:
298
- success = false;
535
+ return -EIO;
299536 }
300537
301538 /*
....@@ -303,626 +540,802 @@
303540 * reason disabled, do not use 2MB pages, since otherwise the legacy
304541 * mechanism is used with 2MB pages, causing a failure.
305542 */
543
+ b->max_page_size = VMW_BALLOON_4K_PAGE;
306544 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
307545 (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
308
- b->supported_page_sizes = 2;
309
- else
310
- b->supported_page_sizes = 1;
546
+ b->max_page_size = VMW_BALLOON_2M_PAGE;
311547
312
- if (!success) {
313
- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
314
- STATS_INC(b->stats.start_fail);
315
- }
316
- return success;
548
+
549
+ return 0;
317550 }
318551
319
-static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
320
-{
321
- switch (status) {
322
- case VMW_BALLOON_SUCCESS:
323
- return true;
324
-
325
- case VMW_BALLOON_ERROR_RESET:
326
- b->reset_required = true;
327
- /* fall through */
328
-
329
- default:
330
- return false;
331
- }
332
-}
333
-
334
-/*
552
+/**
553
+ * vmballoon_send_guest_id - communicate guest type to the host.
554
+ *
555
+ * @b: pointer to the balloon.
556
+ *
335557 * Communicate guest type to the host so that it can adjust ballooning
336558 * algorithm to the one most appropriate for the guest. This command
337559 * is normally issued after sending "start" command and is part of
338560 * standard reset sequence.
561
+ *
562
+ * Return: zero on success or appropriate error code.
339563 */
340
-static bool vmballoon_send_guest_id(struct vmballoon *b)
341
-{
342
- unsigned long status, dummy = 0;
343
-
344
- status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
345
- dummy);
346
-
347
- STATS_INC(b->stats.guest_type);
348
-
349
- if (vmballoon_check_status(b, status))
350
- return true;
351
-
352
- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
353
- STATS_INC(b->stats.guest_type_fail);
354
- return false;
355
-}
356
-
357
-static u16 vmballoon_page_size(bool is_2m_page)
358
-{
359
- if (is_2m_page)
360
- return 1 << VMW_BALLOON_2M_SHIFT;
361
-
362
- return 1;
363
-}
364
-
365
-/*
366
- * Retrieve desired balloon size from the host.
367
- */
368
-static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
564
+static int vmballoon_send_guest_id(struct vmballoon *b)
369565 {
370566 unsigned long status;
371
- unsigned long target;
372
- unsigned long limit;
373
- unsigned long dummy = 0;
374
- u32 limit32;
375567
376
- /*
377
- * si_meminfo() is cheap. Moreover, we want to provide dynamic
378
- * max balloon size later. So let us call si_meminfo() every
379
- * iteration.
380
- */
381
- si_meminfo(&b->sysinfo);
382
- limit = b->sysinfo.totalram;
568
+ status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
569
+ VMW_BALLOON_GUEST_ID, 0);
383570
384
- /* Ensure limit fits in 32-bits */
385
- limit32 = (u32)limit;
386
- if (limit != limit32)
387
- return false;
388
-
389
- /* update stats */
390
- STATS_INC(b->stats.target);
391
-
392
- status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
393
- if (vmballoon_check_status(b, status)) {
394
- *new_target = target;
395
- return true;
396
- }
397
-
398
- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
399
- STATS_INC(b->stats.target_fail);
400
- return false;
571
+ return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
401572 }
402573
403
-/*
404
- * Notify the host about allocated page so that host can use it without
405
- * fear that guest will need it. Host may reject some pages, we need to
406
- * check the return value and maybe submit a different page.
574
+/**
575
+ * vmballoon_page_order() - return the order of the page
576
+ * @page_size: the size of the page.
577
+ *
578
+ * Return: the allocation order.
407579 */
408
-static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
409
- unsigned int *hv_status, unsigned int *target)
580
+static inline
581
+unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
410582 {
411
- unsigned long status, dummy = 0;
412
- u32 pfn32;
583
+ return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
584
+}
413585
414
- pfn32 = (u32)pfn;
415
- if (pfn32 != pfn)
586
+/**
587
+ * vmballoon_page_in_frames() - returns the number of frames in a page.
588
+ * @page_size: the size of the page.
589
+ *
590
+ * Return: the number of 4k frames.
591
+ */
592
+static inline unsigned int
593
+vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
594
+{
595
+ return 1 << vmballoon_page_order(page_size);
596
+}
597
+
598
+/**
599
+ * vmballoon_mark_page_offline() - mark a page as offline
600
+ * @page: pointer for the page.
601
+ * @page_size: the size of the page.
602
+ */
603
+static void
604
+vmballoon_mark_page_offline(struct page *page,
605
+ enum vmballoon_page_size_type page_size)
606
+{
607
+ int i;
608
+
609
+ for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
610
+ __SetPageOffline(page + i);
611
+}
612
+
613
+/**
614
+ * vmballoon_mark_page_online() - mark a page as online
615
+ * @page: pointer for the page.
616
+ * @page_size: the size of the page.
617
+ */
618
+static void
619
+vmballoon_mark_page_online(struct page *page,
620
+ enum vmballoon_page_size_type page_size)
621
+{
622
+ int i;
623
+
624
+ for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
625
+ __ClearPageOffline(page + i);
626
+}
627
+
628
+/**
629
+ * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
630
+ *
631
+ * @b: pointer to the balloon.
632
+ *
633
+ * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
634
+ * by the host-guest protocol and EIO if an error occurred in communicating with
635
+ * the host.
636
+ */
637
+static int vmballoon_send_get_target(struct vmballoon *b)
638
+{
639
+ unsigned long status;
640
+ unsigned long limit;
641
+
642
+ limit = totalram_pages();
643
+
644
+ /* Ensure limit fits in 32-bits if 64-bit targets are not supported */
645
+ if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
646
+ limit != (u32)limit)
416647 return -EINVAL;
417648
418
- STATS_INC(b->stats.lock[false]);
649
+ status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
419650
420
- *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
421
- if (vmballoon_check_status(b, status))
651
+ return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
652
+}
653
+
654
+/**
655
+ * vmballoon_alloc_page_list - allocates a list of pages.
656
+ *
657
+ * @b: pointer to the balloon.
658
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
659
+ * @req_n_pages: the number of requested pages.
660
+ *
661
+ * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
662
+ * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
663
+ *
664
+ * Return: zero on success or error code otherwise.
665
+ */
666
+static int vmballoon_alloc_page_list(struct vmballoon *b,
667
+ struct vmballoon_ctl *ctl,
668
+ unsigned int req_n_pages)
669
+{
670
+ struct page *page;
671
+ unsigned int i;
672
+
673
+ for (i = 0; i < req_n_pages; i++) {
674
+ /*
675
+ * First check if we happen to have pages that were allocated
676
+ * before. This happens when 2MB page rejected during inflation
677
+ * by the hypervisor, and then split into 4KB pages.
678
+ */
679
+ if (!list_empty(&ctl->prealloc_pages)) {
680
+ page = list_first_entry(&ctl->prealloc_pages,
681
+ struct page, lru);
682
+ list_del(&page->lru);
683
+ } else {
684
+ if (ctl->page_size == VMW_BALLOON_2M_PAGE)
685
+ page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
686
+ __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
687
+ else
688
+ page = balloon_page_alloc();
689
+
690
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
691
+ ctl->page_size);
692
+ }
693
+
694
+ if (page) {
695
+ /* Success. Add the page to the list and continue. */
696
+ list_add(&page->lru, &ctl->pages);
697
+ continue;
698
+ }
699
+
700
+ /* Allocation failed. Update statistics and stop. */
701
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
702
+ ctl->page_size);
703
+ break;
704
+ }
705
+
706
+ ctl->n_pages = i;
707
+
708
+ return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
709
+}
710
+
711
+/**
712
+ * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
713
+ *
714
+ * @b: pointer for %struct vmballoon.
715
+ * @page: pointer for the page whose result should be handled.
716
+ * @page_size: size of the page.
717
+ * @status: status of the operation as provided by the hypervisor.
718
+ */
719
+static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
720
+ enum vmballoon_page_size_type page_size,
721
+ unsigned long status)
722
+{
723
+ /* On success do nothing. The page is already on the balloon list. */
724
+ if (likely(status == VMW_BALLOON_SUCCESS))
422725 return 0;
423726
424
- pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
425
- STATS_INC(b->stats.lock_fail[false]);
727
+ pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
728
+ page_to_pfn(page), status,
729
+ vmballoon_page_size_names[page_size]);
730
+
731
+ /* Error occurred */
732
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
733
+ page_size);
734
+
426735 return -EIO;
427736 }
428737
429
-static int vmballoon_send_batched_lock(struct vmballoon *b,
430
- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
738
+/**
739
+ * vmballoon_status_page - returns the status of (un)lock operation
740
+ *
741
+ * @b: pointer to the balloon.
742
+ * @idx: index for the page for which the operation is performed.
743
+ * @p: pointer to where the page struct is returned.
744
+ *
745
+ * Following a lock or unlock operation, returns the status of the operation for
746
+ * an individual page. Provides the page that the operation was performed on on
747
+ * the @page argument.
748
+ *
749
+ * Returns: The status of a lock or unlock operation for an individual page.
750
+ */
751
+static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
752
+ struct page **p)
431753 {
432
- unsigned long status;
433
- unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
754
+ if (static_branch_likely(&vmw_balloon_batching)) {
755
+ /* batching mode */
756
+ *p = pfn_to_page(b->batch_page[idx].pfn);
757
+ return b->batch_page[idx].status;
758
+ }
434759
435
- STATS_INC(b->stats.lock[is_2m_pages]);
760
+ /* non-batching mode */
761
+ *p = b->page;
436762
437
- if (is_2m_pages)
438
- status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
439
- *target);
763
+ /*
764
+ * If a failure occurs, the indication will be provided in the status
765
+ * of the entire operation, which is considered before the individual
766
+ * page status. So for non-batching mode, the indication is always of
767
+ * success.
768
+ */
769
+ return VMW_BALLOON_SUCCESS;
770
+}
771
+
772
+/**
773
+ * vmballoon_lock_op - notifies the host about inflated/deflated pages.
774
+ * @b: pointer to the balloon.
775
+ * @num_pages: number of inflated/deflated pages.
776
+ * @page_size: size of the page.
777
+ * @op: the type of operation (lock or unlock).
778
+ *
779
+ * Notify the host about page(s) that were ballooned (or removed from the
780
+ * balloon) so that host can use it without fear that guest will need it (or
781
+ * stop using them since the VM does). Host may reject some pages, we need to
782
+ * check the return value and maybe submit a different page. The pages that are
783
+ * inflated/deflated are pointed by @b->page.
784
+ *
785
+ * Return: result as provided by the hypervisor.
786
+ */
787
+static unsigned long vmballoon_lock_op(struct vmballoon *b,
788
+ unsigned int num_pages,
789
+ enum vmballoon_page_size_type page_size,
790
+ enum vmballoon_op op)
791
+{
792
+ unsigned long cmd, pfn;
793
+
794
+ lockdep_assert_held(&b->comm_lock);
795
+
796
+ if (static_branch_likely(&vmw_balloon_batching)) {
797
+ if (op == VMW_BALLOON_INFLATE)
798
+ cmd = page_size == VMW_BALLOON_2M_PAGE ?
799
+ VMW_BALLOON_CMD_BATCHED_2M_LOCK :
800
+ VMW_BALLOON_CMD_BATCHED_LOCK;
801
+ else
802
+ cmd = page_size == VMW_BALLOON_2M_PAGE ?
803
+ VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
804
+ VMW_BALLOON_CMD_BATCHED_UNLOCK;
805
+
806
+ pfn = PHYS_PFN(virt_to_phys(b->batch_page));
807
+ } else {
808
+ cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
809
+ VMW_BALLOON_CMD_UNLOCK;
810
+ pfn = page_to_pfn(b->page);
811
+
812
+ /* In non-batching mode, PFNs must fit in 32-bit */
813
+ if (unlikely(pfn != (u32)pfn))
814
+ return VMW_BALLOON_ERROR_PPN_INVALID;
815
+ }
816
+
817
+ return vmballoon_cmd(b, cmd, pfn, num_pages);
818
+}
819
+
820
+/**
821
+ * vmballoon_add_page - adds a page towards lock/unlock operation.
822
+ *
823
+ * @b: pointer to the balloon.
824
+ * @idx: index of the page to be ballooned in this batch.
825
+ * @p: pointer to the page that is about to be ballooned.
826
+ *
827
+ * Adds the page to be ballooned. Must be called while holding @comm_lock.
828
+ */
829
+static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
830
+ struct page *p)
831
+{
832
+ lockdep_assert_held(&b->comm_lock);
833
+
834
+ if (static_branch_likely(&vmw_balloon_batching))
835
+ b->batch_page[idx] = (struct vmballoon_batch_entry)
836
+ { .pfn = page_to_pfn(p) };
440837 else
441
- status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
442
- *target);
838
+ b->page = p;
839
+}
443840
444
- if (vmballoon_check_status(b, status))
841
+/**
842
+ * vmballoon_lock - lock or unlock a batch of pages.
843
+ *
844
+ * @b: pointer to the balloon.
845
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
846
+ *
847
+ * Notifies the host of about ballooned pages (after inflation or deflation,
848
+ * according to @ctl). If the host rejects the page put it on the
849
+ * @ctl refuse list. These refused page are then released when moving to the
850
+ * next size of pages.
851
+ *
852
+ * Note that we neither free any @page here nor put them back on the ballooned
853
+ * pages list. Instead we queue it for later processing. We do that for several
854
+ * reasons. First, we do not want to free the page under the lock. Second, it
855
+ * allows us to unify the handling of lock and unlock. In the inflate case, the
856
+ * caller will check if there are too many refused pages and release them.
857
+ * Although it is not identical to the past behavior, it should not affect
858
+ * performance.
859
+ */
860
+static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
861
+{
862
+ unsigned long batch_status;
863
+ struct page *page;
864
+ unsigned int i, num_pages;
865
+
866
+ num_pages = ctl->n_pages;
867
+ if (num_pages == 0)
445868 return 0;
446869
447
- pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
448
- STATS_INC(b->stats.lock_fail[is_2m_pages]);
449
- return 1;
450
-}
870
+ /* communication with the host is done under the communication lock */
871
+ spin_lock(&b->comm_lock);
451872
452
-/*
453
- * Notify the host that guest intends to release given page back into
454
- * the pool of available (to the guest) pages.
455
- */
456
-static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
457
- unsigned int *target)
458
-{
459
- unsigned long status, dummy = 0;
460
- u32 pfn32;
873
+ i = 0;
874
+ list_for_each_entry(page, &ctl->pages, lru)
875
+ vmballoon_add_page(b, i++, page);
461876
462
- pfn32 = (u32)pfn;
463
- if (pfn32 != pfn)
464
- return false;
877
+ batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
878
+ ctl->op);
465879
466
- STATS_INC(b->stats.unlock[false]);
880
+ /*
881
+ * Iterate over the pages in the provided list. Since we are changing
882
+ * @ctl->n_pages we are saving the original value in @num_pages and
883
+ * use this value to bound the loop.
884
+ */
885
+ for (i = 0; i < num_pages; i++) {
886
+ unsigned long status;
467887
468
- status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
469
- if (vmballoon_check_status(b, status))
470
- return true;
471
-
472
- pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
473
- STATS_INC(b->stats.unlock_fail[false]);
474
- return false;
475
-}
476
-
477
-static bool vmballoon_send_batched_unlock(struct vmballoon *b,
478
- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
479
-{
480
- unsigned long status;
481
- unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
482
-
483
- STATS_INC(b->stats.unlock[is_2m_pages]);
484
-
485
- if (is_2m_pages)
486
- status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
487
- *target);
488
- else
489
- status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
490
- *target);
491
-
492
- if (vmballoon_check_status(b, status))
493
- return true;
494
-
495
- pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
496
- STATS_INC(b->stats.unlock_fail[is_2m_pages]);
497
- return false;
498
-}
499
-
500
-static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
501
-{
502
- if (is_2m_page)
503
- return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
504
-
505
- return alloc_page(flags);
506
-}
507
-
508
-static void vmballoon_free_page(struct page *page, bool is_2m_page)
509
-{
510
- if (is_2m_page)
511
- __free_pages(page, VMW_BALLOON_2M_SHIFT);
512
- else
513
- __free_page(page);
514
-}
515
-
516
-/*
517
- * Quickly release all pages allocated for the balloon. This function is
518
- * called when host decides to "reset" balloon for one reason or another.
519
- * Unlike normal "deflate" we do not (shall not) notify host of the pages
520
- * being released.
521
- */
522
-static void vmballoon_pop(struct vmballoon *b)
523
-{
524
- struct page *page, *next;
525
- unsigned is_2m_pages;
526
-
527
- for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
528
- is_2m_pages++) {
529
- struct vmballoon_page_size *page_size =
530
- &b->page_sizes[is_2m_pages];
531
- u16 size_per_page = vmballoon_page_size(is_2m_pages);
532
-
533
- list_for_each_entry_safe(page, next, &page_size->pages, lru) {
534
- list_del(&page->lru);
535
- vmballoon_free_page(page, is_2m_pages);
536
- STATS_INC(b->stats.free[is_2m_pages]);
537
- b->size -= size_per_page;
538
- cond_resched();
539
- }
540
- }
541
-
542
- /* Clearing the batch_page unconditionally has no adverse effect */
543
- free_page((unsigned long)b->batch_page);
544
- b->batch_page = NULL;
545
-}
546
-
547
-/*
548
- * Notify the host of a ballooned page. If host rejects the page put it on the
549
- * refuse list, those refused page are then released at the end of the
550
- * inflation cycle.
551
- */
552
-static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
553
- bool is_2m_pages, unsigned int *target)
554
-{
555
- int locked, hv_status;
556
- struct page *page = b->page;
557
- struct vmballoon_page_size *page_size = &b->page_sizes[false];
558
-
559
- /* is_2m_pages can never happen as 2m pages support implies batching */
560
-
561
- locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
562
- target);
563
- if (locked) {
564
- STATS_INC(b->stats.refused_alloc[false]);
565
-
566
- if (locked == -EIO &&
567
- (hv_status == VMW_BALLOON_ERROR_RESET ||
568
- hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
569
- vmballoon_free_page(page, false);
570
- return -EIO;
571
- }
888
+ status = vmballoon_status_page(b, i, &page);
572889
573890 /*
574
- * Place page on the list of non-balloonable pages
575
- * and retry allocation, unless we already accumulated
576
- * too many of them, in which case take a breather.
891
+ * Failure of the whole batch overrides a single operation
892
+ * results.
577893 */
578
- if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
579
- page_size->n_refused_pages++;
580
- list_add(&page->lru, &page_size->refused_pages);
581
- } else {
582
- vmballoon_free_page(page, false);
583
- }
584
- return locked;
894
+ if (batch_status != VMW_BALLOON_SUCCESS)
895
+ status = batch_status;
896
+
897
+ /* Continue if no error happened */
898
+ if (!vmballoon_handle_one_result(b, page, ctl->page_size,
899
+ status))
900
+ continue;
901
+
902
+ /*
903
+ * Error happened. Move the pages to the refused list and update
904
+ * the pages number.
905
+ */
906
+ list_move(&page->lru, &ctl->refused_pages);
907
+ ctl->n_pages--;
908
+ ctl->n_refused_pages++;
585909 }
586910
587
- /* track allocated page */
588
- list_add(&page->lru, &page_size->pages);
911
+ spin_unlock(&b->comm_lock);
589912
590
- /* update balloon size */
591
- b->size++;
592
-
593
- return 0;
913
+ return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
594914 }
595915
596
-static int vmballoon_lock_batched_page(struct vmballoon *b,
597
- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
598
-{
599
- int locked, i;
600
- u16 size_per_page = vmballoon_page_size(is_2m_pages);
601
-
602
- locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
603
- target);
604
- if (locked > 0) {
605
- for (i = 0; i < num_pages; i++) {
606
- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
607
- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
608
-
609
- vmballoon_free_page(p, is_2m_pages);
610
- }
611
-
612
- return -EIO;
613
- }
614
-
615
- for (i = 0; i < num_pages; i++) {
616
- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
617
- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
618
- struct vmballoon_page_size *page_size =
619
- &b->page_sizes[is_2m_pages];
620
-
621
- locked = vmballoon_batch_get_status(b->batch_page, i);
622
-
623
- switch (locked) {
624
- case VMW_BALLOON_SUCCESS:
625
- list_add(&p->lru, &page_size->pages);
626
- b->size += size_per_page;
627
- break;
628
- case VMW_BALLOON_ERROR_PPN_PINNED:
629
- case VMW_BALLOON_ERROR_PPN_INVALID:
630
- if (page_size->n_refused_pages
631
- < VMW_BALLOON_MAX_REFUSED) {
632
- list_add(&p->lru, &page_size->refused_pages);
633
- page_size->n_refused_pages++;
634
- break;
635
- }
636
- /* Fallthrough */
637
- case VMW_BALLOON_ERROR_RESET:
638
- case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
639
- vmballoon_free_page(p, is_2m_pages);
640
- break;
641
- default:
642
- /* This should never happen */
643
- WARN_ON_ONCE(true);
644
- }
645
- }
646
-
647
- return 0;
648
-}
649
-
650
-/*
651
- * Release the page allocated for the balloon. Note that we first notify
652
- * the host so it can make sure the page will be available for the guest
653
- * to use, if needed.
916
+/**
917
+ * vmballoon_release_page_list() - Releases a page list
918
+ *
919
+ * @page_list: list of pages to release.
920
+ * @n_pages: pointer to the number of pages.
921
+ * @page_size: whether the pages in the list are 2MB (or else 4KB).
922
+ *
923
+ * Releases the list of pages and zeros the number of pages.
654924 */
655
-static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
656
- bool is_2m_pages, unsigned int *target)
925
+static void vmballoon_release_page_list(struct list_head *page_list,
926
+ int *n_pages,
927
+ enum vmballoon_page_size_type page_size)
657928 {
658
- struct page *page = b->page;
659
- struct vmballoon_page_size *page_size = &b->page_sizes[false];
929
+ struct page *page, *tmp;
660930
661
- /* is_2m_pages can never happen as 2m pages support implies batching */
662
-
663
- if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
664
- list_add(&page->lru, &page_size->pages);
665
- return -EIO;
931
+ list_for_each_entry_safe(page, tmp, page_list, lru) {
932
+ list_del(&page->lru);
933
+ __free_pages(page, vmballoon_page_order(page_size));
666934 }
667935
668
- /* deallocate page */
669
- vmballoon_free_page(page, false);
670
- STATS_INC(b->stats.free[false]);
671
-
672
- /* update balloon size */
673
- b->size--;
674
-
675
- return 0;
936
+ if (n_pages)
937
+ *n_pages = 0;
676938 }
677939
678
-static int vmballoon_unlock_batched_page(struct vmballoon *b,
679
- unsigned int num_pages, bool is_2m_pages,
680
- unsigned int *target)
681
-{
682
- int locked, i, ret = 0;
683
- bool hv_success;
684
- u16 size_per_page = vmballoon_page_size(is_2m_pages);
685
-
686
- hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
687
- target);
688
- if (!hv_success)
689
- ret = -EIO;
690
-
691
- for (i = 0; i < num_pages; i++) {
692
- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
693
- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
694
- struct vmballoon_page_size *page_size =
695
- &b->page_sizes[is_2m_pages];
696
-
697
- locked = vmballoon_batch_get_status(b->batch_page, i);
698
- if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
699
- /*
700
- * That page wasn't successfully unlocked by the
701
- * hypervisor, re-add it to the list of pages owned by
702
- * the balloon driver.
703
- */
704
- list_add(&p->lru, &page_size->pages);
705
- } else {
706
- /* deallocate page */
707
- vmballoon_free_page(p, is_2m_pages);
708
- STATS_INC(b->stats.free[is_2m_pages]);
709
-
710
- /* update balloon size */
711
- b->size -= size_per_page;
712
- }
713
- }
714
-
715
- return ret;
716
-}
717940
718941 /*
719942 * Release pages that were allocated while attempting to inflate the
720943 * balloon but were refused by the host for one reason or another.
721944 */
722945 static void vmballoon_release_refused_pages(struct vmballoon *b,
723
- bool is_2m_pages)
946
+ struct vmballoon_ctl *ctl)
724947 {
725
- struct page *page, *next;
726
- struct vmballoon_page_size *page_size =
727
- &b->page_sizes[is_2m_pages];
948
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
949
+ ctl->page_size);
728950
729
- list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
730
- list_del(&page->lru);
731
- vmballoon_free_page(page, is_2m_pages);
732
- STATS_INC(b->stats.refused_free[is_2m_pages]);
951
+ vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
952
+ ctl->page_size);
953
+}
954
+
955
+/**
956
+ * vmballoon_change - retrieve the required balloon change
957
+ *
958
+ * @b: pointer for the balloon.
959
+ *
960
+ * Return: the required change for the balloon size. A positive number
961
+ * indicates inflation, a negative number indicates a deflation.
962
+ */
963
+static int64_t vmballoon_change(struct vmballoon *b)
964
+{
965
+ int64_t size, target;
966
+
967
+ size = atomic64_read(&b->size);
968
+ target = READ_ONCE(b->target);
969
+
970
+ /*
971
+ * We must cast first because of int sizes
972
+ * Otherwise we might get huge positives instead of negatives
973
+ */
974
+
975
+ if (b->reset_required)
976
+ return 0;
977
+
978
+ /* consider a 2MB slack on deflate, unless the balloon is emptied */
979
+ if (target < size && target != 0 &&
980
+ size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
981
+ return 0;
982
+
983
+ /* If an out-of-memory recently occurred, inflation is disallowed. */
984
+ if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
985
+ return 0;
986
+
987
+ return target - size;
988
+}
989
+
990
+/**
991
+ * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
992
+ *
993
+ * @b: pointer to balloon.
994
+ * @pages: list of pages to enqueue.
995
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
996
+ * @page_size: whether the pages are 2MB or 4KB pages.
997
+ *
998
+ * Enqueues the provides list of pages in the ballooned page list, clears the
999
+ * list and zeroes the number of pages that was provided.
1000
+ */
1001
+static void vmballoon_enqueue_page_list(struct vmballoon *b,
1002
+ struct list_head *pages,
1003
+ unsigned int *n_pages,
1004
+ enum vmballoon_page_size_type page_size)
1005
+{
1006
+ unsigned long flags;
1007
+ struct page *page;
1008
+
1009
+ if (page_size == VMW_BALLOON_4K_PAGE) {
1010
+ balloon_page_list_enqueue(&b->b_dev_info, pages);
1011
+ } else {
1012
+ /*
1013
+ * Keep the huge pages in a local list which is not available
1014
+ * for the balloon compaction mechanism.
1015
+ */
1016
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
1017
+
1018
+ list_for_each_entry(page, pages, lru) {
1019
+ vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
1020
+ }
1021
+
1022
+ list_splice_init(pages, &b->huge_pages);
1023
+ __count_vm_events(BALLOON_INFLATE, *n_pages *
1024
+ vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
1025
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
7331026 }
7341027
735
- page_size->n_refused_pages = 0;
1028
+ *n_pages = 0;
7361029 }
7371030
738
-static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
1031
+/**
1032
+ * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
1033
+ *
1034
+ * @b: pointer to balloon.
1035
+ * @pages: list of pages to enqueue.
1036
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
1037
+ * @page_size: whether the pages are 2MB or 4KB pages.
1038
+ * @n_req_pages: the number of requested pages.
1039
+ *
1040
+ * Dequeues the number of requested pages from the balloon for deflation. The
1041
+ * number of dequeued pages may be lower, if not enough pages in the requested
1042
+ * size are available.
1043
+ */
1044
+static void vmballoon_dequeue_page_list(struct vmballoon *b,
1045
+ struct list_head *pages,
1046
+ unsigned int *n_pages,
1047
+ enum vmballoon_page_size_type page_size,
1048
+ unsigned int n_req_pages)
7391049 {
740
- b->page = p;
1050
+ struct page *page, *tmp;
1051
+ unsigned int i = 0;
1052
+ unsigned long flags;
1053
+
1054
+ /* In the case of 4k pages, use the compaction infrastructure */
1055
+ if (page_size == VMW_BALLOON_4K_PAGE) {
1056
+ *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
1057
+ n_req_pages);
1058
+ return;
1059
+ }
1060
+
1061
+ /* 2MB pages */
1062
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
1063
+ list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
1064
+ vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
1065
+
1066
+ list_move(&page->lru, pages);
1067
+ if (++i == n_req_pages)
1068
+ break;
1069
+ }
1070
+
1071
+ __count_vm_events(BALLOON_DEFLATE,
1072
+ i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
1073
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
1074
+ *n_pages = i;
7411075 }
7421076
743
-static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
744
- struct page *p)
1077
+/**
1078
+ * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
1079
+ *
1080
+ * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
1081
+ * due to one or few 4KB pages. These 2MB pages may keep being allocated and
1082
+ * then being refused. To prevent this case, this function splits the refused
1083
+ * pages into 4KB pages and adds them into @prealloc_pages list.
1084
+ *
1085
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
1086
+ */
1087
+static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
7451088 {
746
- vmballoon_batch_set_pa(b->batch_page, idx,
747
- (u64)page_to_pfn(p) << PAGE_SHIFT);
1089
+ struct page *page, *tmp;
1090
+ unsigned int i, order;
1091
+
1092
+ order = vmballoon_page_order(ctl->page_size);
1093
+
1094
+ list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
1095
+ list_del(&page->lru);
1096
+ split_page(page, order);
1097
+ for (i = 0; i < (1 << order); i++)
1098
+ list_add(&page[i].lru, &ctl->prealloc_pages);
1099
+ }
1100
+ ctl->n_refused_pages = 0;
7481101 }
7491102
750
-/*
751
- * Inflate the balloon towards its target size. Note that we try to limit
752
- * the rate of allocation to make sure we are not choking the rest of the
753
- * system.
1103
+/**
1104
+ * vmballoon_inflate() - Inflate the balloon towards its target size.
1105
+ *
1106
+ * @b: pointer to the balloon.
7541107 */
7551108 static void vmballoon_inflate(struct vmballoon *b)
7561109 {
757
- unsigned int num_pages = 0;
758
- int error = 0;
759
- gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
760
- bool is_2m_pages;
1110
+ int64_t to_inflate_frames;
1111
+ struct vmballoon_ctl ctl = {
1112
+ .pages = LIST_HEAD_INIT(ctl.pages),
1113
+ .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
1114
+ .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
1115
+ .page_size = b->max_page_size,
1116
+ .op = VMW_BALLOON_INFLATE
1117
+ };
7611118
762
- pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
1119
+ while ((to_inflate_frames = vmballoon_change(b)) > 0) {
1120
+ unsigned int to_inflate_pages, page_in_frames;
1121
+ int alloc_error, lock_error = 0;
7631122
764
- /*
765
- * First try NOSLEEP page allocations to inflate balloon.
766
- *
767
- * If we do not throttle nosleep allocations, we can drain all
768
- * free pages in the guest quickly (if the balloon target is high).
769
- * As a side-effect, draining free pages helps to inform (force)
770
- * the guest to start swapping if balloon target is not met yet,
771
- * which is a desired behavior. However, balloon driver can consume
772
- * all available CPU cycles if too many pages are allocated in a
773
- * second. Therefore, we throttle nosleep allocations even when
774
- * the guest is not under memory pressure. OTOH, if we have already
775
- * predicted that the guest is under memory pressure, then we
776
- * slowdown page allocations considerably.
777
- */
1123
+ VM_BUG_ON(!list_empty(&ctl.pages));
1124
+ VM_BUG_ON(ctl.n_pages != 0);
7781125
779
- /*
780
- * Start with no sleep allocation rate which may be higher
781
- * than sleeping allocation rate.
782
- */
783
- is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
1126
+ page_in_frames = vmballoon_page_in_frames(ctl.page_size);
7841127
785
- pr_debug("%s - goal: %d", __func__, b->target - b->size);
1128
+ to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
1129
+ DIV_ROUND_UP_ULL(to_inflate_frames,
1130
+ page_in_frames));
7861131
787
- while (!b->reset_required &&
788
- b->size + num_pages * vmballoon_page_size(is_2m_pages)
789
- < b->target) {
790
- struct page *page;
1132
+ /* Start by allocating */
1133
+ alloc_error = vmballoon_alloc_page_list(b, &ctl,
1134
+ to_inflate_pages);
7911135
792
- if (flags == VMW_PAGE_ALLOC_NOSLEEP)
793
- STATS_INC(b->stats.alloc[is_2m_pages]);
794
- else
795
- STATS_INC(b->stats.sleep_alloc);
1136
+ /* Actually lock the pages by telling the hypervisor */
1137
+ lock_error = vmballoon_lock(b, &ctl);
7961138
797
- page = vmballoon_alloc_page(flags, is_2m_pages);
798
- if (!page) {
799
- STATS_INC(b->stats.alloc_fail[is_2m_pages]);
1139
+ /*
1140
+ * If an error indicates that something serious went wrong,
1141
+ * stop the inflation.
1142
+ */
1143
+ if (lock_error)
1144
+ break;
8001145
801
- if (is_2m_pages) {
802
- b->ops->lock(b, num_pages, true, &b->target);
1146
+ /* Update the balloon size */
1147
+ atomic64_add(ctl.n_pages * page_in_frames, &b->size);
8031148
804
- /*
805
- * ignore errors from locking as we now switch
806
- * to 4k pages and we might get different
807
- * errors.
808
- */
1149
+ vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
1150
+ ctl.page_size);
8091151
810
- num_pages = 0;
811
- is_2m_pages = false;
812
- continue;
813
- }
814
-
815
- if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
816
- /*
817
- * CANSLEEP page allocation failed, so guest
818
- * is under severe memory pressure. We just log
819
- * the event, but do not stop the inflation
820
- * due to its negative impact on performance.
821
- */
822
- STATS_INC(b->stats.sleep_alloc_fail);
1152
+ /*
1153
+ * If allocation failed or the number of refused pages exceeds
1154
+ * the maximum allowed, move to the next page size.
1155
+ */
1156
+ if (alloc_error ||
1157
+ ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
1158
+ if (ctl.page_size == VMW_BALLOON_4K_PAGE)
8231159 break;
824
- }
8251160
8261161 /*
827
- * NOSLEEP page allocation failed, so the guest is
828
- * under memory pressure. Slowing down page alloctions
829
- * seems to be reasonable, but doing so might actually
830
- * cause the hypervisor to throttle us down, resulting
831
- * in degraded performance. We will count on the
832
- * scheduler and standard memory management mechanisms
833
- * for now.
1162
+ * Split the refused pages to 4k. This will also empty
1163
+ * the refused pages list.
8341164 */
835
- flags = VMW_PAGE_ALLOC_CANSLEEP;
836
- continue;
837
- }
838
-
839
- b->ops->add_page(b, num_pages++, page);
840
- if (num_pages == b->batch_max_pages) {
841
- error = b->ops->lock(b, num_pages, is_2m_pages,
842
- &b->target);
843
- num_pages = 0;
844
- if (error)
845
- break;
1165
+ vmballoon_split_refused_pages(&ctl);
1166
+ ctl.page_size--;
8461167 }
8471168
8481169 cond_resched();
8491170 }
8501171
851
- if (num_pages > 0)
852
- b->ops->lock(b, num_pages, is_2m_pages, &b->target);
1172
+ /*
1173
+ * Release pages that were allocated while attempting to inflate the
1174
+ * balloon but were refused by the host for one reason or another,
1175
+ * and update the statistics.
1176
+ */
1177
+ if (ctl.n_refused_pages != 0)
1178
+ vmballoon_release_refused_pages(b, &ctl);
8531179
854
- vmballoon_release_refused_pages(b, true);
855
- vmballoon_release_refused_pages(b, false);
1180
+ vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
8561181 }
8571182
858
-/*
1183
+/**
1184
+ * vmballoon_deflate() - Decrease the size of the balloon.
1185
+ *
1186
+ * @b: pointer to the balloon
1187
+ * @n_frames: the number of frames to deflate. If zero, automatically
1188
+ * calculated according to the target size.
1189
+ * @coordinated: whether to coordinate with the host
1190
+ *
8591191 * Decrease the size of the balloon allowing guest to use more memory.
1192
+ *
1193
+ * Return: The number of deflated frames (i.e., basic page size units)
8601194 */
861
-static void vmballoon_deflate(struct vmballoon *b)
1195
+static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
1196
+ bool coordinated)
8621197 {
863
- unsigned is_2m_pages;
864
-
865
- pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
1198
+ unsigned long deflated_frames = 0;
1199
+ unsigned long tried_frames = 0;
1200
+ struct vmballoon_ctl ctl = {
1201
+ .pages = LIST_HEAD_INIT(ctl.pages),
1202
+ .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
1203
+ .page_size = VMW_BALLOON_4K_PAGE,
1204
+ .op = VMW_BALLOON_DEFLATE
1205
+ };
8661206
8671207 /* free pages to reach target */
868
- for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
869
- is_2m_pages++) {
870
- struct page *page, *next;
871
- unsigned int num_pages = 0;
872
- struct vmballoon_page_size *page_size =
873
- &b->page_sizes[is_2m_pages];
1208
+ while (true) {
1209
+ unsigned int to_deflate_pages, n_unlocked_frames;
1210
+ unsigned int page_in_frames;
1211
+ int64_t to_deflate_frames;
1212
+ bool deflated_all;
8741213
875
- list_for_each_entry_safe(page, next, &page_size->pages, lru) {
876
- if (b->reset_required ||
877
- (b->target > 0 &&
878
- b->size - num_pages
879
- * vmballoon_page_size(is_2m_pages)
880
- < b->target + vmballoon_page_size(true)))
1214
+ page_in_frames = vmballoon_page_in_frames(ctl.page_size);
1215
+
1216
+ VM_BUG_ON(!list_empty(&ctl.pages));
1217
+ VM_BUG_ON(ctl.n_pages);
1218
+ VM_BUG_ON(!list_empty(&ctl.refused_pages));
1219
+ VM_BUG_ON(ctl.n_refused_pages);
1220
+
1221
+ /*
1222
+ * If we were requested a specific number of frames, we try to
1223
+ * deflate this number of frames. Otherwise, deflation is
1224
+ * performed according to the target and balloon size.
1225
+ */
1226
+ to_deflate_frames = n_frames ? n_frames - tried_frames :
1227
+ -vmballoon_change(b);
1228
+
1229
+ /* break if no work to do */
1230
+ if (to_deflate_frames <= 0)
1231
+ break;
1232
+
1233
+ /*
1234
+ * Calculate the number of frames based on current page size,
1235
+ * but limit the deflated frames to a single chunk
1236
+ */
1237
+ to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
1238
+ DIV_ROUND_UP_ULL(to_deflate_frames,
1239
+ page_in_frames));
1240
+
1241
+ /* First take the pages from the balloon pages. */
1242
+ vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
1243
+ ctl.page_size, to_deflate_pages);
1244
+
1245
+ /*
1246
+ * Before pages are moving to the refused list, count their
1247
+ * frames as frames that we tried to deflate.
1248
+ */
1249
+ tried_frames += ctl.n_pages * page_in_frames;
1250
+
1251
+ /*
1252
+ * Unlock the pages by communicating with the hypervisor if the
1253
+ * communication is coordinated (i.e., not pop). We ignore the
1254
+ * return code. Instead we check if all the pages we manage to
1255
+ * unlock all the pages. If we failed, we will move to the next
1256
+ * page size, and would eventually try again later.
1257
+ */
1258
+ if (coordinated)
1259
+ vmballoon_lock(b, &ctl);
1260
+
1261
+ /*
1262
+ * Check if we deflated enough. We will move to the next page
1263
+ * size if we did not manage to do so. This calculation takes
1264
+ * place now, as once the pages are released, the number of
1265
+ * pages is zeroed.
1266
+ */
1267
+ deflated_all = (ctl.n_pages == to_deflate_pages);
1268
+
1269
+ /* Update local and global counters */
1270
+ n_unlocked_frames = ctl.n_pages * page_in_frames;
1271
+ atomic64_sub(n_unlocked_frames, &b->size);
1272
+ deflated_frames += n_unlocked_frames;
1273
+
1274
+ vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
1275
+ ctl.page_size, ctl.n_pages);
1276
+
1277
+ /* free the ballooned pages */
1278
+ vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
1279
+ ctl.page_size);
1280
+
1281
+ /* Return the refused pages to the ballooned list. */
1282
+ vmballoon_enqueue_page_list(b, &ctl.refused_pages,
1283
+ &ctl.n_refused_pages,
1284
+ ctl.page_size);
1285
+
1286
+ /* If we failed to unlock all the pages, move to next size. */
1287
+ if (!deflated_all) {
1288
+ if (ctl.page_size == b->max_page_size)
8811289 break;
882
-
883
- list_del(&page->lru);
884
- b->ops->add_page(b, num_pages++, page);
885
-
886
- if (num_pages == b->batch_max_pages) {
887
- int error;
888
-
889
- error = b->ops->unlock(b, num_pages,
890
- is_2m_pages, &b->target);
891
- num_pages = 0;
892
- if (error)
893
- return;
894
- }
895
-
896
- cond_resched();
1290
+ ctl.page_size++;
8971291 }
8981292
899
- if (num_pages > 0)
900
- b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
1293
+ cond_resched();
9011294 }
1295
+
1296
+ return deflated_frames;
9021297 }
9031298
904
-static const struct vmballoon_ops vmballoon_basic_ops = {
905
- .add_page = vmballoon_add_page,
906
- .lock = vmballoon_lock_page,
907
- .unlock = vmballoon_unlock_page
908
-};
1299
+/**
1300
+ * vmballoon_deinit_batching - disables batching mode.
1301
+ *
1302
+ * @b: pointer to &struct vmballoon.
1303
+ *
1304
+ * Disables batching, by deallocating the page for communication with the
1305
+ * hypervisor and disabling the static key to indicate that batching is off.
1306
+ */
1307
+static void vmballoon_deinit_batching(struct vmballoon *b)
1308
+{
1309
+ free_page((unsigned long)b->batch_page);
1310
+ b->batch_page = NULL;
1311
+ static_branch_disable(&vmw_balloon_batching);
1312
+ b->batch_max_pages = 1;
1313
+}
9091314
910
-static const struct vmballoon_ops vmballoon_batched_ops = {
911
- .add_page = vmballoon_add_batched_page,
912
- .lock = vmballoon_lock_batched_page,
913
- .unlock = vmballoon_unlock_batched_page
914
-};
915
-
916
-static bool vmballoon_init_batching(struct vmballoon *b)
1315
+/**
1316
+ * vmballoon_init_batching - enable batching mode.
1317
+ *
1318
+ * @b: pointer to &struct vmballoon.
1319
+ *
1320
+ * Enables batching, by allocating a page for communication with the hypervisor
1321
+ * and enabling the static_key to use batching.
1322
+ *
1323
+ * Return: zero on success or an appropriate error-code.
1324
+ */
1325
+static int vmballoon_init_batching(struct vmballoon *b)
9171326 {
9181327 struct page *page;
9191328
9201329 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9211330 if (!page)
922
- return false;
1331
+ return -ENOMEM;
9231332
9241333 b->batch_page = page_address(page);
925
- return true;
1334
+ b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
1335
+
1336
+ static_branch_enable(&vmw_balloon_batching);
1337
+
1338
+ return 0;
9261339 }
9271340
9281341 /*
....@@ -932,7 +1345,7 @@
9321345 {
9331346 struct vmballoon *b = client_data;
9341347
935
- STATS_INC(b->stats.doorbell);
1348
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
9361349
9371350 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
9381351 }
....@@ -942,11 +1355,8 @@
9421355 */
9431356 static void vmballoon_vmci_cleanup(struct vmballoon *b)
9441357 {
945
- int error;
946
-
947
- VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
948
- VMCI_INVALID_ID, error);
949
- STATS_INC(b->stats.doorbell_unset);
1358
+ vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
1359
+ VMCI_INVALID_ID, VMCI_INVALID_ID);
9501360
9511361 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
9521362 vmci_doorbell_destroy(b->vmci_doorbell);
....@@ -954,12 +1364,19 @@
9541364 }
9551365 }
9561366
957
-/*
958
- * Initialize vmci doorbell, to get notified as soon as balloon changes
1367
+/**
1368
+ * vmballoon_vmci_init - Initialize vmci doorbell.
1369
+ *
1370
+ * @b: pointer to the balloon.
1371
+ *
1372
+ * Return: zero on success or when wakeup command not supported. Error-code
1373
+ * otherwise.
1374
+ *
1375
+ * Initialize vmci doorbell, to get notified as soon as balloon changes.
9591376 */
9601377 static int vmballoon_vmci_init(struct vmballoon *b)
9611378 {
962
- unsigned long error, dummy;
1379
+ unsigned long error;
9631380
9641381 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
9651382 return 0;
....@@ -971,10 +1388,9 @@
9711388 if (error != VMCI_SUCCESS)
9721389 goto fail;
9731390
974
- error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context,
975
- b->vmci_doorbell.resource, dummy);
976
-
977
- STATS_INC(b->stats.doorbell_set);
1391
+ error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
1392
+ b->vmci_doorbell.context,
1393
+ b->vmci_doorbell.resource, NULL);
9781394
9791395 if (error != VMW_BALLOON_SUCCESS)
9801396 goto fail;
....@@ -983,6 +1399,23 @@
9831399 fail:
9841400 vmballoon_vmci_cleanup(b);
9851401 return -EIO;
1402
+}
1403
+
1404
+/**
1405
+ * vmballoon_pop - Quickly release all pages allocate for the balloon.
1406
+ *
1407
+ * @b: pointer to the balloon.
1408
+ *
1409
+ * This function is called when host decides to "reset" balloon for one reason
1410
+ * or another. Unlike normal "deflate" we do not (shall not) notify host of the
1411
+ * pages being released.
1412
+ */
1413
+static void vmballoon_pop(struct vmballoon *b)
1414
+{
1415
+ unsigned long size;
1416
+
1417
+ while ((size = atomic64_read(&b->size)))
1418
+ vmballoon_deflate(b, size, false);
9861419 }
9871420
9881421 /*
....@@ -994,18 +1427,18 @@
9941427 {
9951428 int error;
9961429
1430
+ down_write(&b->conf_sem);
1431
+
9971432 vmballoon_vmci_cleanup(b);
9981433
9991434 /* free all pages, skipping monitor unlock */
10001435 vmballoon_pop(b);
10011436
1002
- if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1003
- return;
1437
+ if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1438
+ goto unlock;
10041439
10051440 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1006
- b->ops = &vmballoon_batched_ops;
1007
- b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1008
- if (!vmballoon_init_batching(b)) {
1441
+ if (vmballoon_init_batching(b)) {
10091442 /*
10101443 * We failed to initialize batching, inform the monitor
10111444 * about it by sending a null capability.
....@@ -1013,48 +1446,66 @@
10131446 * The guest will retry in one second.
10141447 */
10151448 vmballoon_send_start(b, 0);
1016
- return;
1449
+ goto unlock;
10171450 }
10181451 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1019
- b->ops = &vmballoon_basic_ops;
1020
- b->batch_max_pages = 1;
1452
+ vmballoon_deinit_batching(b);
10211453 }
10221454
1455
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
10231456 b->reset_required = false;
10241457
10251458 error = vmballoon_vmci_init(b);
10261459 if (error)
10271460 pr_err("failed to initialize vmci doorbell\n");
10281461
1029
- if (!vmballoon_send_guest_id(b))
1462
+ if (vmballoon_send_guest_id(b))
10301463 pr_err("failed to send guest ID to the host\n");
1464
+
1465
+unlock:
1466
+ up_write(&b->conf_sem);
10311467 }
10321468
1033
-/*
1034
- * Balloon work function: reset protocol, if needed, get the new size and
1035
- * adjust balloon as needed. Repeat in 1 sec.
1469
+/**
1470
+ * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
1471
+ *
1472
+ * @work: pointer to the &work_struct which is provided by the workqueue.
1473
+ *
1474
+ * Resets the protocol if needed, gets the new size and adjusts balloon as
1475
+ * needed. Repeat in 1 sec.
10361476 */
10371477 static void vmballoon_work(struct work_struct *work)
10381478 {
10391479 struct delayed_work *dwork = to_delayed_work(work);
10401480 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
1041
- unsigned int target;
1042
-
1043
- STATS_INC(b->stats.timer);
1481
+ int64_t change = 0;
10441482
10451483 if (b->reset_required)
10461484 vmballoon_reset(b);
10471485
1048
- if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
1049
- /* update target, adjust size */
1050
- b->target = target;
1486
+ down_read(&b->conf_sem);
10511487
1052
- if (b->size < target)
1488
+ /*
1489
+ * Update the stats while holding the semaphore to ensure that
1490
+ * @stats_enabled is consistent with whether the stats are actually
1491
+ * enabled
1492
+ */
1493
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
1494
+
1495
+ if (!vmballoon_send_get_target(b))
1496
+ change = vmballoon_change(b);
1497
+
1498
+ if (change != 0) {
1499
+ pr_debug("%s - size: %llu, target %lu\n", __func__,
1500
+ atomic64_read(&b->size), READ_ONCE(b->target));
1501
+
1502
+ if (change > 0)
10531503 vmballoon_inflate(b);
1054
- else if (target == 0 ||
1055
- b->size > target + vmballoon_page_size(true))
1056
- vmballoon_deflate(b);
1504
+ else /* (change < 0) */
1505
+ vmballoon_deflate(b, 0, true);
10571506 }
1507
+
1508
+ up_read(&b->conf_sem);
10581509
10591510 /*
10601511 * We are using a freezable workqueue so that balloon operations are
....@@ -1062,6 +1513,91 @@
10621513 */
10631514 queue_delayed_work(system_freezable_wq,
10641515 dwork, round_jiffies_relative(HZ));
1516
+
1517
+}
1518
+
1519
+/**
1520
+ * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
1521
+ * @shrinker: pointer to the balloon shrinker.
1522
+ * @sc: page reclaim information.
1523
+ *
1524
+ * Returns: number of pages that were freed during deflation.
1525
+ */
1526
+static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
1527
+ struct shrink_control *sc)
1528
+{
1529
+ struct vmballoon *b = &balloon;
1530
+ unsigned long deflated_frames;
1531
+
1532
+ pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
1533
+
1534
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
1535
+
1536
+ /*
1537
+ * If the lock is also contended for read, we cannot easily reclaim and
1538
+ * we bail out.
1539
+ */
1540
+ if (!down_read_trylock(&b->conf_sem))
1541
+ return 0;
1542
+
1543
+ deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
1544
+
1545
+ vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
1546
+ deflated_frames);
1547
+
1548
+ /*
1549
+ * Delay future inflation for some time to mitigate the situations in
1550
+ * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
1551
+ * the access is asynchronous.
1552
+ */
1553
+ WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
1554
+
1555
+ up_read(&b->conf_sem);
1556
+
1557
+ return deflated_frames;
1558
+}
1559
+
1560
+/**
1561
+ * vmballoon_shrinker_count() - return the number of ballooned pages.
1562
+ * @shrinker: pointer to the balloon shrinker.
1563
+ * @sc: page reclaim information.
1564
+ *
1565
+ * Returns: number of 4k pages that are allocated for the balloon and can
1566
+ * therefore be reclaimed under pressure.
1567
+ */
1568
+static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
1569
+ struct shrink_control *sc)
1570
+{
1571
+ struct vmballoon *b = &balloon;
1572
+
1573
+ return atomic64_read(&b->size);
1574
+}
1575
+
1576
+static void vmballoon_unregister_shrinker(struct vmballoon *b)
1577
+{
1578
+ if (b->shrinker_registered)
1579
+ unregister_shrinker(&b->shrinker);
1580
+ b->shrinker_registered = false;
1581
+}
1582
+
1583
+static int vmballoon_register_shrinker(struct vmballoon *b)
1584
+{
1585
+ int r;
1586
+
1587
+ /* Do nothing if the shrinker is not enabled */
1588
+ if (!vmwballoon_shrinker_enable)
1589
+ return 0;
1590
+
1591
+ b->shrinker.scan_objects = vmballoon_shrinker_scan;
1592
+ b->shrinker.count_objects = vmballoon_shrinker_count;
1593
+ b->shrinker.seeks = DEFAULT_SEEKS;
1594
+
1595
+ r = register_shrinker(&b->shrinker);
1596
+
1597
+ if (r == 0)
1598
+ b->shrinker_registered = true;
1599
+
1600
+ return r;
10651601 }
10661602
10671603 /*
....@@ -1069,106 +1605,126 @@
10691605 */
10701606 #ifdef CONFIG_DEBUG_FS
10711607
1608
+static const char * const vmballoon_stat_page_names[] = {
1609
+ [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc",
1610
+ [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail",
1611
+ [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc",
1612
+ [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree",
1613
+ [VMW_BALLOON_PAGE_STAT_FREE] = "free"
1614
+};
1615
+
1616
+static const char * const vmballoon_stat_names[] = {
1617
+ [VMW_BALLOON_STAT_TIMER] = "timer",
1618
+ [VMW_BALLOON_STAT_DOORBELL] = "doorbell",
1619
+ [VMW_BALLOON_STAT_RESET] = "reset",
1620
+ [VMW_BALLOON_STAT_SHRINK] = "shrink",
1621
+ [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
1622
+};
1623
+
1624
+static int vmballoon_enable_stats(struct vmballoon *b)
1625
+{
1626
+ int r = 0;
1627
+
1628
+ down_write(&b->conf_sem);
1629
+
1630
+ /* did we somehow race with another reader which enabled stats? */
1631
+ if (b->stats)
1632
+ goto out;
1633
+
1634
+ b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
1635
+
1636
+ if (!b->stats) {
1637
+ /* allocation failed */
1638
+ r = -ENOMEM;
1639
+ goto out;
1640
+ }
1641
+ static_key_enable(&balloon_stat_enabled.key);
1642
+out:
1643
+ up_write(&b->conf_sem);
1644
+ return r;
1645
+}
1646
+
1647
+/**
1648
+ * vmballoon_debug_show - shows statistics of balloon operations.
1649
+ * @f: pointer to the &struct seq_file.
1650
+ * @offset: ignored.
1651
+ *
1652
+ * Provides the statistics that can be accessed in vmmemctl in the debugfs.
1653
+ * To avoid the overhead - mainly that of memory - of collecting the statistics,
1654
+ * we only collect statistics after the first time the counters are read.
1655
+ *
1656
+ * Return: zero on success or an error code.
1657
+ */
10721658 static int vmballoon_debug_show(struct seq_file *f, void *offset)
10731659 {
10741660 struct vmballoon *b = f->private;
1075
- struct vmballoon_stats *stats = &b->stats;
1661
+ int i, j;
1662
+
1663
+ /* enables stats if they are disabled */
1664
+ if (!b->stats) {
1665
+ int r = vmballoon_enable_stats(b);
1666
+
1667
+ if (r)
1668
+ return r;
1669
+ }
10761670
10771671 /* format capabilities info */
1078
- seq_printf(f,
1079
- "balloon capabilities: %#4x\n"
1080
- "used capabilities: %#4lx\n"
1081
- "is resetting: %c\n",
1082
- VMW_BALLOON_CAPABILITIES, b->capabilities,
1083
- b->reset_required ? 'y' : 'n');
1672
+ seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
1673
+ VMW_BALLOON_CAPABILITIES);
1674
+ seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
1675
+ seq_printf(f, "%-22s: %16s\n", "is resetting",
1676
+ b->reset_required ? "y" : "n");
10841677
10851678 /* format size info */
1086
- seq_printf(f,
1087
- "target: %8d pages\n"
1088
- "current: %8d pages\n",
1089
- b->target, b->size);
1679
+ seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
1680
+ seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
10901681
1091
- seq_printf(f,
1092
- "\n"
1093
- "timer: %8u\n"
1094
- "doorbell: %8u\n"
1095
- "start: %8u (%4u failed)\n"
1096
- "guestType: %8u (%4u failed)\n"
1097
- "2m-lock: %8u (%4u failed)\n"
1098
- "lock: %8u (%4u failed)\n"
1099
- "2m-unlock: %8u (%4u failed)\n"
1100
- "unlock: %8u (%4u failed)\n"
1101
- "target: %8u (%4u failed)\n"
1102
- "prim2mAlloc: %8u (%4u failed)\n"
1103
- "primNoSleepAlloc: %8u (%4u failed)\n"
1104
- "primCanSleepAlloc: %8u (%4u failed)\n"
1105
- "prim2mFree: %8u\n"
1106
- "primFree: %8u\n"
1107
- "err2mAlloc: %8u\n"
1108
- "errAlloc: %8u\n"
1109
- "err2mFree: %8u\n"
1110
- "errFree: %8u\n"
1111
- "doorbellSet: %8u\n"
1112
- "doorbellUnset: %8u\n",
1113
- stats->timer,
1114
- stats->doorbell,
1115
- stats->start, stats->start_fail,
1116
- stats->guest_type, stats->guest_type_fail,
1117
- stats->lock[true], stats->lock_fail[true],
1118
- stats->lock[false], stats->lock_fail[false],
1119
- stats->unlock[true], stats->unlock_fail[true],
1120
- stats->unlock[false], stats->unlock_fail[false],
1121
- stats->target, stats->target_fail,
1122
- stats->alloc[true], stats->alloc_fail[true],
1123
- stats->alloc[false], stats->alloc_fail[false],
1124
- stats->sleep_alloc, stats->sleep_alloc_fail,
1125
- stats->free[true],
1126
- stats->free[false],
1127
- stats->refused_alloc[true], stats->refused_alloc[false],
1128
- stats->refused_free[true], stats->refused_free[false],
1129
- stats->doorbell_set, stats->doorbell_unset);
1682
+ for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
1683
+ if (vmballoon_cmd_names[i] == NULL)
1684
+ continue;
11301685
1131
- return 0;
1132
-}
1686
+ seq_printf(f, "%-22s: %16llu (%llu failed)\n",
1687
+ vmballoon_cmd_names[i],
1688
+ atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
1689
+ atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
1690
+ }
11331691
1134
-static int vmballoon_debug_open(struct inode *inode, struct file *file)
1135
-{
1136
- return single_open(file, vmballoon_debug_show, inode->i_private);
1137
-}
1692
+ for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
1693
+ seq_printf(f, "%-22s: %16llu\n",
1694
+ vmballoon_stat_names[i],
1695
+ atomic64_read(&b->stats->general_stat[i]));
11381696
1139
-static const struct file_operations vmballoon_debug_fops = {
1140
- .owner = THIS_MODULE,
1141
- .open = vmballoon_debug_open,
1142
- .read = seq_read,
1143
- .llseek = seq_lseek,
1144
- .release = single_release,
1145
-};
1146
-
1147
-static int __init vmballoon_debugfs_init(struct vmballoon *b)
1148
-{
1149
- int error;
1150
-
1151
- b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1152
- &vmballoon_debug_fops);
1153
- if (IS_ERR(b->dbg_entry)) {
1154
- error = PTR_ERR(b->dbg_entry);
1155
- pr_err("failed to create debugfs entry, error: %d\n", error);
1156
- return error;
1697
+ for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
1698
+ for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
1699
+ seq_printf(f, "%-18s(%s): %16llu\n",
1700
+ vmballoon_stat_page_names[i],
1701
+ vmballoon_page_size_names[j],
1702
+ atomic64_read(&b->stats->page_stat[i][j]));
11571703 }
11581704
11591705 return 0;
11601706 }
11611707
1708
+DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
1709
+
1710
+static void __init vmballoon_debugfs_init(struct vmballoon *b)
1711
+{
1712
+ b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1713
+ &vmballoon_debug_fops);
1714
+}
1715
+
11621716 static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
11631717 {
1718
+ static_key_disable(&balloon_stat_enabled.key);
11641719 debugfs_remove(b->dbg_entry);
1720
+ kfree(b->stats);
1721
+ b->stats = NULL;
11651722 }
11661723
11671724 #else
11681725
1169
-static inline int vmballoon_debugfs_init(struct vmballoon *b)
1726
+static inline void vmballoon_debugfs_init(struct vmballoon *b)
11701727 {
1171
- return 0;
11721728 }
11731729
11741730 static inline void vmballoon_debugfs_exit(struct vmballoon *b)
....@@ -1177,10 +1733,199 @@
11771733
11781734 #endif /* CONFIG_DEBUG_FS */
11791735
1736
+
1737
+#ifdef CONFIG_BALLOON_COMPACTION
1738
+
1739
+static int vmballoon_init_fs_context(struct fs_context *fc)
1740
+{
1741
+ return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
1742
+}
1743
+
1744
+static struct file_system_type vmballoon_fs = {
1745
+ .name = "balloon-vmware",
1746
+ .init_fs_context = vmballoon_init_fs_context,
1747
+ .kill_sb = kill_anon_super,
1748
+};
1749
+
1750
+static struct vfsmount *vmballoon_mnt;
1751
+
1752
+/**
1753
+ * vmballoon_migratepage() - migrates a balloon page.
1754
+ * @b_dev_info: balloon device information descriptor.
1755
+ * @newpage: the page to which @page should be migrated.
1756
+ * @page: a ballooned page that should be migrated.
1757
+ * @mode: migration mode, ignored.
1758
+ *
1759
+ * This function is really open-coded, but that is according to the interface
1760
+ * that balloon_compaction provides.
1761
+ *
1762
+ * Return: zero on success, -EAGAIN when migration cannot be performed
1763
+ * momentarily, and -EBUSY if migration failed and should be retried
1764
+ * with that specific page.
1765
+ */
1766
+static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
1767
+ struct page *newpage, struct page *page,
1768
+ enum migrate_mode mode)
1769
+{
1770
+ unsigned long status, flags;
1771
+ struct vmballoon *b;
1772
+ int ret;
1773
+
1774
+ b = container_of(b_dev_info, struct vmballoon, b_dev_info);
1775
+
1776
+ /*
1777
+ * If the semaphore is taken, there is ongoing configuration change
1778
+ * (i.e., balloon reset), so try again.
1779
+ */
1780
+ if (!down_read_trylock(&b->conf_sem))
1781
+ return -EAGAIN;
1782
+
1783
+ spin_lock(&b->comm_lock);
1784
+ /*
1785
+ * We must start by deflating and not inflating, as otherwise the
1786
+ * hypervisor may tell us that it has enough memory and the new page is
1787
+ * not needed. Since the old page is isolated, we cannot use the list
1788
+ * interface to unlock it, as the LRU field is used for isolation.
1789
+ * Instead, we use the native interface directly.
1790
+ */
1791
+ vmballoon_add_page(b, 0, page);
1792
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
1793
+ VMW_BALLOON_DEFLATE);
1794
+
1795
+ if (status == VMW_BALLOON_SUCCESS)
1796
+ status = vmballoon_status_page(b, 0, &page);
1797
+
1798
+ /*
1799
+ * If a failure happened, let the migration mechanism know that it
1800
+ * should not retry.
1801
+ */
1802
+ if (status != VMW_BALLOON_SUCCESS) {
1803
+ spin_unlock(&b->comm_lock);
1804
+ ret = -EBUSY;
1805
+ goto out_unlock;
1806
+ }
1807
+
1808
+ /*
1809
+ * The page is isolated, so it is safe to delete it without holding
1810
+ * @pages_lock . We keep holding @comm_lock since we will need it in a
1811
+ * second.
1812
+ */
1813
+ balloon_page_delete(page);
1814
+
1815
+ put_page(page);
1816
+
1817
+ /* Inflate */
1818
+ vmballoon_add_page(b, 0, newpage);
1819
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
1820
+ VMW_BALLOON_INFLATE);
1821
+
1822
+ if (status == VMW_BALLOON_SUCCESS)
1823
+ status = vmballoon_status_page(b, 0, &newpage);
1824
+
1825
+ spin_unlock(&b->comm_lock);
1826
+
1827
+ if (status != VMW_BALLOON_SUCCESS) {
1828
+ /*
1829
+ * A failure happened. While we can deflate the page we just
1830
+ * inflated, this deflation can also encounter an error. Instead
1831
+ * we will decrease the size of the balloon to reflect the
1832
+ * change and report failure.
1833
+ */
1834
+ atomic64_dec(&b->size);
1835
+ ret = -EBUSY;
1836
+ } else {
1837
+ /*
1838
+ * Success. Take a reference for the page, and we will add it to
1839
+ * the list after acquiring the lock.
1840
+ */
1841
+ get_page(newpage);
1842
+ ret = MIGRATEPAGE_SUCCESS;
1843
+ }
1844
+
1845
+ /* Update the balloon list under the @pages_lock */
1846
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
1847
+
1848
+ /*
1849
+ * On inflation success, we already took a reference for the @newpage.
1850
+ * If we succeed just insert it to the list and update the statistics
1851
+ * under the lock.
1852
+ */
1853
+ if (ret == MIGRATEPAGE_SUCCESS) {
1854
+ balloon_page_insert(&b->b_dev_info, newpage);
1855
+ __count_vm_event(BALLOON_MIGRATE);
1856
+ }
1857
+
1858
+ /*
1859
+ * We deflated successfully, so regardless to the inflation success, we
1860
+ * need to reduce the number of isolated_pages.
1861
+ */
1862
+ b->b_dev_info.isolated_pages--;
1863
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
1864
+
1865
+out_unlock:
1866
+ up_read(&b->conf_sem);
1867
+ return ret;
1868
+}
1869
+
1870
+/**
1871
+ * vmballoon_compaction_deinit() - removes compaction related data.
1872
+ *
1873
+ * @b: pointer to the balloon.
1874
+ */
1875
+static void vmballoon_compaction_deinit(struct vmballoon *b)
1876
+{
1877
+ if (!IS_ERR(b->b_dev_info.inode))
1878
+ iput(b->b_dev_info.inode);
1879
+
1880
+ b->b_dev_info.inode = NULL;
1881
+ kern_unmount(vmballoon_mnt);
1882
+ vmballoon_mnt = NULL;
1883
+}
1884
+
1885
+/**
1886
+ * vmballoon_compaction_init() - initialized compaction for the balloon.
1887
+ *
1888
+ * @b: pointer to the balloon.
1889
+ *
1890
+ * If during the initialization a failure occurred, this function does not
1891
+ * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
1892
+ * case.
1893
+ *
1894
+ * Return: zero on success or error code on failure.
1895
+ */
1896
+static __init int vmballoon_compaction_init(struct vmballoon *b)
1897
+{
1898
+ vmballoon_mnt = kern_mount(&vmballoon_fs);
1899
+ if (IS_ERR(vmballoon_mnt))
1900
+ return PTR_ERR(vmballoon_mnt);
1901
+
1902
+ b->b_dev_info.migratepage = vmballoon_migratepage;
1903
+ b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
1904
+
1905
+ if (IS_ERR(b->b_dev_info.inode))
1906
+ return PTR_ERR(b->b_dev_info.inode);
1907
+
1908
+ b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
1909
+ return 0;
1910
+}
1911
+
1912
+#else /* CONFIG_BALLOON_COMPACTION */
1913
+
1914
+static void vmballoon_compaction_deinit(struct vmballoon *b)
1915
+{
1916
+}
1917
+
1918
+static int vmballoon_compaction_init(struct vmballoon *b)
1919
+{
1920
+ return 0;
1921
+}
1922
+
1923
+#endif /* CONFIG_BALLOON_COMPACTION */
1924
+
11801925 static int __init vmballoon_init(void)
11811926 {
11821927 int error;
1183
- unsigned is_2m_pages;
1928
+
11841929 /*
11851930 * Check if we are running on VMware's hypervisor and bail out
11861931 * if we are not.
....@@ -1188,18 +1933,24 @@
11881933 if (x86_hyper_type != X86_HYPER_VMWARE)
11891934 return -ENODEV;
11901935
1191
- for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1192
- is_2m_pages++) {
1193
- INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1194
- INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1195
- }
1196
-
11971936 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
11981937
1199
- error = vmballoon_debugfs_init(&balloon);
1938
+ error = vmballoon_register_shrinker(&balloon);
12001939 if (error)
1201
- return error;
1940
+ goto fail;
12021941
1942
+ /*
1943
+ * Initialization of compaction must be done after the call to
1944
+ * balloon_devinfo_init() .
1945
+ */
1946
+ balloon_devinfo_init(&balloon.b_dev_info);
1947
+ error = vmballoon_compaction_init(&balloon);
1948
+ if (error)
1949
+ goto fail;
1950
+
1951
+ INIT_LIST_HEAD(&balloon.huge_pages);
1952
+ spin_lock_init(&balloon.comm_lock);
1953
+ init_rwsem(&balloon.conf_sem);
12031954 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
12041955 balloon.batch_page = NULL;
12051956 balloon.page = NULL;
....@@ -1207,7 +1958,13 @@
12071958
12081959 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
12091960
1961
+ vmballoon_debugfs_init(&balloon);
1962
+
12101963 return 0;
1964
+fail:
1965
+ vmballoon_unregister_shrinker(&balloon);
1966
+ vmballoon_compaction_deinit(&balloon);
1967
+ return error;
12111968 }
12121969
12131970 /*
....@@ -1220,6 +1977,7 @@
12201977
12211978 static void __exit vmballoon_exit(void)
12221979 {
1980
+ vmballoon_unregister_shrinker(&balloon);
12231981 vmballoon_vmci_cleanup(&balloon);
12241982 cancel_delayed_work_sync(&balloon.dwork);
12251983
....@@ -1232,5 +1990,8 @@
12321990 */
12331991 vmballoon_send_start(&balloon, 0);
12341992 vmballoon_pop(&balloon);
1993
+
1994
+ /* Only once we popped the balloon, compaction can be deinit */
1995
+ vmballoon_compaction_deinit(&balloon);
12351996 }
12361997 module_exit(vmballoon_exit);