~hc/RK356X_SDK_RELEASE.git

..	..	@@ -17,6 +17,7 @@
17	17	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18	18
19	19	#include <linux/types.h>
	20	+#include <linux/io.h>
20	21	#include <linux/kernel.h>
21	22	#include <linux/mm.h>
22	23	#include <linux/vmalloc.h>
..	..	@@ -25,35 +26,35 @@
25	26	#include <linux/workqueue.h>
26	27	#include <linux/debugfs.h>
27	28	#include <linux/seq_file.h>
	29	+#include <linux/rwsem.h>
	30	+#include <linux/slab.h>
	31	+#include <linux/spinlock.h>
	32	+#include <linux/mount.h>
	33	+#include <linux/pseudo_fs.h>
	34	+#include <linux/balloon_compaction.h>
28	35	#include <linux/vmw_vmci_defs.h>
29	36	#include <linux/vmw_vmci_api.h>
30	37	#include <asm/hypervisor.h>
31	38
32	39	MODULE_AUTHOR("VMware, Inc.");
33	40	MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
34		-MODULE_VERSION("1.5.0.0-k");
35	41	MODULE_ALIAS("dmi::svnVMware:*");
36	42	MODULE_ALIAS("vmware_vmmemctl");
37	43	MODULE_LICENSE("GPL");
38	44
39		-/*
40		- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
41		- * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
42		- * __GFP_NOWARN, to suppress page allocation failure warnings.
43		- */
44		-#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM\|__GFP_NOWARN)
	45	+static bool __read_mostly vmwballoon_shrinker_enable;
	46	+module_param(vmwballoon_shrinker_enable, bool, 0444);
	47	+MODULE_PARM_DESC(vmwballoon_shrinker_enable,
	48	+ "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
45	49
46		-/*
47		- * Use GFP_HIGHUSER when executing in a separate kernel thread
48		- * context and allocation can sleep. This is less stressful to
49		- * the guest memory system, since it allows the thread to block
50		- * while memory is reclaimed, and won't take pages from emergency
51		- * low-memory pools.
52		- */
53		-#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
	50	+/* Delay in seconds after shrink before inflation. */
	51	+#define VMBALLOON_SHRINK_DELAY (5)
54	52
55	53	/* Maximum number of refused pages we accumulate during inflation cycle */
56	54	#define VMW_BALLOON_MAX_REFUSED 16
	55	+
	56	+/* Magic number for the balloon mount-point */
	57	+#define BALLOON_VMW_MAGIC 0x0ba11007
57	58
58	59	/*
59	60	* Hypervisor communication port definitions.
..	..	@@ -70,232 +71,468 @@
70	71	VMW_BALLOON_BATCHED_CMDS = (1 << 2),
71	72	VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
72	73	VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
	74	+ VMW_BALLOON_64_BIT_TARGET = (1 << 5)
73	75	};
74	76
75		-#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
	77	+#define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \
76	78	\| VMW_BALLOON_BATCHED_CMDS \
77	79	\| VMW_BALLOON_BATCHED_2M_CMDS \
78	80	\| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
79	81
80		-#define VMW_BALLOON_2M_SHIFT (9)
81		-#define VMW_BALLOON_NUM_PAGE_SIZES (2)
	82	+#define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
82	83
83	84	/*
84		- * Backdoor commands availability:
85		- *
86		- * START, GET_TARGET and GUEST_ID are always available,
87		- *
88		- * VMW_BALLOON_BASIC_CMDS:
89		- * LOCK and UNLOCK commands,
90		- * VMW_BALLOON_BATCHED_CMDS:
91		- * BATCHED_LOCK and BATCHED_UNLOCK commands.
92		- * VMW BALLOON_BATCHED_2M_CMDS:
93		- * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94		- * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95		- * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
	85	+ * 64-bit targets are only supported in 64-bit
96	86	*/
97		-#define VMW_BALLOON_CMD_START 0
98		-#define VMW_BALLOON_CMD_GET_TARGET 1
99		-#define VMW_BALLOON_CMD_LOCK 2
100		-#define VMW_BALLOON_CMD_UNLOCK 3
101		-#define VMW_BALLOON_CMD_GUEST_ID 4
102		-#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103		-#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104		-#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105		-#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
106		-#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
	87	+#ifdef CONFIG_64BIT
	88	+#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \
	89	+ \| VMW_BALLOON_64_BIT_TARGET)
	90	+#else
	91	+#define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON
	92	+#endif
107	93
	94	+enum vmballoon_page_size_type {
	95	+ VMW_BALLOON_4K_PAGE,
	96	+ VMW_BALLOON_2M_PAGE,
	97	+ VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
	98	+};
108	99
109		-/* error codes */
110		-#define VMW_BALLOON_SUCCESS 0
111		-#define VMW_BALLOON_FAILURE -1
112		-#define VMW_BALLOON_ERROR_CMD_INVALID 1
113		-#define VMW_BALLOON_ERROR_PPN_INVALID 2
114		-#define VMW_BALLOON_ERROR_PPN_LOCKED 3
115		-#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
116		-#define VMW_BALLOON_ERROR_PPN_PINNED 5
117		-#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
118		-#define VMW_BALLOON_ERROR_RESET 7
119		-#define VMW_BALLOON_ERROR_BUSY 8
	100	+#define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1)
	101	+
	102	+static const char * const vmballoon_page_size_names[] = {
	103	+ [VMW_BALLOON_4K_PAGE] = "4k",
	104	+ [VMW_BALLOON_2M_PAGE] = "2M"
	105	+};
	106	+
	107	+enum vmballoon_op {
	108	+ VMW_BALLOON_INFLATE,
	109	+ VMW_BALLOON_DEFLATE
	110	+};
	111	+
	112	+enum vmballoon_op_stat_type {
	113	+ VMW_BALLOON_OP_STAT,
	114	+ VMW_BALLOON_OP_FAIL_STAT
	115	+};
	116	+
	117	+#define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1)
	118	+
	119	+/**
	120	+ * enum vmballoon_cmd_type - backdoor commands.
	121	+ *
	122	+ * Availability of the commands is as followed:
	123	+ *
	124	+ * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
	125	+ * %VMW_BALLOON_CMD_GUEST_ID are always available.
	126	+ *
	127	+ * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
	128	+ * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
	129	+ *
	130	+ * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
	131	+ * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
	132	+ * are available.
	133	+ *
	134	+ * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
	135	+ * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
	136	+ * are supported.
	137	+ *
	138	+ * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
	139	+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
	140	+ *
	141	+ * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
	142	+ * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
	143	+ * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
	144	+ * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
	145	+ * to be deflated from the balloon.
	146	+ * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
	147	+ * runs in the VM.
	148	+ * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
	149	+ * ballooned pages (up to 512).
	150	+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
	151	+ * pages that are about to be deflated from the
	152	+ * balloon (up to 512).
	153	+ * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
	154	+ * for 2MB pages.
	155	+ * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
	156	+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
	157	+ * pages.
	158	+ * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
	159	+ * that would be invoked when the balloon
	160	+ * size changes.
	161	+ * @VMW_BALLOON_CMD_LAST: Value of the last command.
	162	+ */
	163	+enum vmballoon_cmd_type {
	164	+ VMW_BALLOON_CMD_START,
	165	+ VMW_BALLOON_CMD_GET_TARGET,
	166	+ VMW_BALLOON_CMD_LOCK,
	167	+ VMW_BALLOON_CMD_UNLOCK,
	168	+ VMW_BALLOON_CMD_GUEST_ID,
	169	+ /* No command 5 */
	170	+ VMW_BALLOON_CMD_BATCHED_LOCK = 6,
	171	+ VMW_BALLOON_CMD_BATCHED_UNLOCK,
	172	+ VMW_BALLOON_CMD_BATCHED_2M_LOCK,
	173	+ VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
	174	+ VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
	175	+ VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
	176	+};
	177	+
	178	+#define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1)
	179	+
	180	+enum vmballoon_error_codes {
	181	+ VMW_BALLOON_SUCCESS,
	182	+ VMW_BALLOON_ERROR_CMD_INVALID,
	183	+ VMW_BALLOON_ERROR_PPN_INVALID,
	184	+ VMW_BALLOON_ERROR_PPN_LOCKED,
	185	+ VMW_BALLOON_ERROR_PPN_UNLOCKED,
	186	+ VMW_BALLOON_ERROR_PPN_PINNED,
	187	+ VMW_BALLOON_ERROR_PPN_NOTNEEDED,
	188	+ VMW_BALLOON_ERROR_RESET,
	189	+ VMW_BALLOON_ERROR_BUSY
	190	+};
120	191
121	192	#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
122	193
123		-/* Batch page description */
	194	+#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
	195	+ ((1UL << VMW_BALLOON_CMD_GET_TARGET) \| \
	196	+ (1UL << VMW_BALLOON_CMD_LOCK) \| \
	197	+ (1UL << VMW_BALLOON_CMD_UNLOCK) \| \
	198	+ (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) \| \
	199	+ (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) \| \
	200	+ (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) \| \
	201	+ (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
124	202
125		-/*
126		- * Layout of a page in the batch page:
127		- *
128		- * +-------------+----------+--------+
129		- * \| \| \| \|
130		- * \| Page number \| Reserved \| Status \|
131		- * \| \| \| \|
132		- * +-------------+----------+--------+
133		- * 64 PAGE_SHIFT 6 0
134		- *
135		- * The reserved field should be set to 0.
136		- */
137		-#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
138		-#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
139		-#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
140		-
141		-struct vmballoon_batch_page {
142		- u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
	203	+static const char * const vmballoon_cmd_names[] = {
	204	+ [VMW_BALLOON_CMD_START] = "start",
	205	+ [VMW_BALLOON_CMD_GET_TARGET] = "target",
	206	+ [VMW_BALLOON_CMD_LOCK] = "lock",
	207	+ [VMW_BALLOON_CMD_UNLOCK] = "unlock",
	208	+ [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
	209	+ [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
	210	+ [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
	211	+ [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
	212	+ [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
	213	+ [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
143	214	};
144	215
145		-static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
146		-{
147		- return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
148		-}
149		-
150		-static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
151		- int idx)
152		-{
153		- return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
154		-}
155		-
156		-static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
157		- u64 pa)
158		-{
159		- batch->pages[idx] = pa;
160		-}
161		-
162		-
163		-#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
164		-({ \
165		- unsigned long __status, __dummy1, __dummy2, __dummy3; \
166		- __asm__ __volatile__ ("inl %%dx" : \
167		- "=a"(__status), \
168		- "=c"(__dummy1), \
169		- "=d"(__dummy2), \
170		- "=b"(result), \
171		- "=S" (__dummy3) : \
172		- "0"(VMW_BALLOON_HV_MAGIC), \
173		- "1"(VMW_BALLOON_CMD_##cmd), \
174		- "2"(VMW_BALLOON_HV_PORT), \
175		- "3"(arg1), \
176		- "4" (arg2) : \
177		- "memory"); \
178		- if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
179		- result = __dummy1; \
180		- result &= -1UL; \
181		- __status & -1UL; \
182		-})
183		-
184		-#ifdef CONFIG_DEBUG_FS
185		-struct vmballoon_stats {
186		- unsigned int timer;
187		- unsigned int doorbell;
188		-
189		- /* allocation statistics */
190		- unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
191		- unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
192		- unsigned int sleep_alloc;
193		- unsigned int sleep_alloc_fail;
194		- unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
195		- unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
196		- unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
197		-
198		- /* monitor operations */
199		- unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
200		- unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
201		- unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
202		- unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
203		- unsigned int target;
204		- unsigned int target_fail;
205		- unsigned int start;
206		- unsigned int start_fail;
207		- unsigned int guest_type;
208		- unsigned int guest_type_fail;
209		- unsigned int doorbell_set;
210		- unsigned int doorbell_unset;
	216	+enum vmballoon_stat_page {
	217	+ VMW_BALLOON_PAGE_STAT_ALLOC,
	218	+ VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
	219	+ VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
	220	+ VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
	221	+ VMW_BALLOON_PAGE_STAT_FREE,
	222	+ VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
211	223	};
212	224
213		-#define STATS_INC(stat) (stat)++
214		-#else
215		-#define STATS_INC(stat)
216		-#endif
	225	+#define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1)
217	226
218		-struct vmballoon;
219		-
220		-struct vmballoon_ops {
221		- void (add_page)(struct vmballoon b, int idx, struct page *p);
222		- int (lock)(struct vmballoon b, unsigned int num_pages,
223		- bool is_2m_pages, unsigned int *target);
224		- int (unlock)(struct vmballoon b, unsigned int num_pages,
225		- bool is_2m_pages, unsigned int *target);
	227	+enum vmballoon_stat_general {
	228	+ VMW_BALLOON_STAT_TIMER,
	229	+ VMW_BALLOON_STAT_DOORBELL,
	230	+ VMW_BALLOON_STAT_RESET,
	231	+ VMW_BALLOON_STAT_SHRINK,
	232	+ VMW_BALLOON_STAT_SHRINK_FREE,
	233	+ VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
226	234	};
227	235
228		-struct vmballoon_page_size {
229		- /* list of reserved physical pages */
	236	+#define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
	237	+
	238	+static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
	239	+static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
	240	+
	241	+struct vmballoon_ctl {
230	242	struct list_head pages;
231		-
232		- /* transient list of non-balloonable pages */
233	243	struct list_head refused_pages;
	244	+ struct list_head prealloc_pages;
234	245	unsigned int n_refused_pages;
	246	+ unsigned int n_pages;
	247	+ enum vmballoon_page_size_type page_size;
	248	+ enum vmballoon_op op;
235	249	};
	250	+
	251	+/**
	252	+ * struct vmballoon_batch_entry - a batch entry for lock or unlock.
	253	+ *
	254	+ * @status: the status of the operation, which is written by the hypervisor.
	255	+ * @reserved: reserved for future use. Must be set to zero.
	256	+ * @pfn: the physical frame number of the page to be locked or unlocked.
	257	+ */
	258	+struct vmballoon_batch_entry {
	259	+ u64 status : 5;
	260	+ u64 reserved : PAGE_SHIFT - 5;
	261	+ u64 pfn : 52;
	262	+} __packed;
236	263
237	264	struct vmballoon {
238		- struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
	265	+ /**
	266	+ * @max_page_size: maximum supported page size for ballooning.
	267	+ *
	268	+ * Protected by @conf_sem
	269	+ */
	270	+ enum vmballoon_page_size_type max_page_size;
239	271
240		- /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
241		- unsigned supported_page_sizes;
	272	+ /**
	273	+ * @size: balloon actual size in basic page size (frames).
	274	+ *
	275	+ * While we currently do not support size which is bigger than 32-bit,
	276	+ * in preparation for future support, use 64-bits.
	277	+ */
	278	+ atomic64_t size;
242	279
243		- /* balloon size in pages */
244		- unsigned int size;
245		- unsigned int target;
	280	+ /**
	281	+ * @target: balloon target size in basic page size (frames).
	282	+ *
	283	+ * We do not protect the target under the assumption that setting the
	284	+ * value is always done through a single write. If this assumption ever
	285	+ * breaks, we would have to use X_ONCE for accesses, and suffer the less
	286	+ * optimized code. Although we may read stale target value if multiple
	287	+ * accesses happen at once, the performance impact should be minor.
	288	+ */
	289	+ unsigned long target;
246	290
247		- /* reset flag */
	291	+ /**
	292	+ * @reset_required: reset flag
	293	+ *
	294	+ * Setting this flag may introduce races, but the code is expected to
	295	+ * handle them gracefully. In the worst case, another operation will
	296	+ * fail as reset did not take place. Clearing the flag is done while
	297	+ * holding @conf_sem for write.
	298	+ */
248	299	bool reset_required;
249	300
	301	+ /**
	302	+ * @capabilities: hypervisor balloon capabilities.
	303	+ *
	304	+ * Protected by @conf_sem.
	305	+ */
250	306	unsigned long capabilities;
251	307
252		- struct vmballoon_batch_page *batch_page;
	308	+ /**
	309	+ * @batch_page: pointer to communication batch page.
	310	+ *
	311	+ * When batching is used, batch_page points to a page, which holds up to
	312	+ * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
	313	+ */
	314	+ struct vmballoon_batch_entry *batch_page;
	315	+
	316	+ /**
	317	+ * @batch_max_pages: maximum pages that can be locked/unlocked.
	318	+ *
	319	+ * Indicates the number of pages that the hypervisor can lock or unlock
	320	+ * at once, according to whether batching is enabled. If batching is
	321	+ * disabled, only a single page can be locked/unlock on each operation.
	322	+ *
	323	+ * Protected by @conf_sem.
	324	+ */
253	325	unsigned int batch_max_pages;
	326	+
	327	+ /**
	328	+ * @page: page to be locked/unlocked by the hypervisor
	329	+ *
	330	+ * @page is only used when batching is disabled and a single page is
	331	+ * reclaimed on each iteration.
	332	+ *
	333	+ * Protected by @comm_lock.
	334	+ */
254	335	struct page *page;
255	336
256		- const struct vmballoon_ops *ops;
	337	+ /**
	338	+ * @shrink_timeout: timeout until the next inflation.
	339	+ *
	340	+ * After an shrink event, indicates the time in jiffies after which
	341	+ * inflation is allowed again. Can be written concurrently with reads,
	342	+ * so must use READ_ONCE/WRITE_ONCE when accessing.
	343	+ */
	344	+ unsigned long shrink_timeout;
	345	+
	346	+ /* statistics */
	347	+ struct vmballoon_stats *stats;
257	348
258	349	#ifdef CONFIG_DEBUG_FS
259		- /* statistics */
260		- struct vmballoon_stats stats;
261		-
262	350	/* debugfs file exporting statistics */
263	351	struct dentry *dbg_entry;
264	352	#endif
265	353
266		- struct sysinfo sysinfo;
	354	+ /**
	355	+ * @b_dev_info: balloon device information descriptor.
	356	+ */
	357	+ struct balloon_dev_info b_dev_info;
267	358
268	359	struct delayed_work dwork;
269	360
	361	+ /**
	362	+ * @huge_pages - list of the inflated 2MB pages.
	363	+ *
	364	+ * Protected by @b_dev_info.pages_lock .
	365	+ */
	366	+ struct list_head huge_pages;
	367	+
	368	+ /**
	369	+ * @vmci_doorbell.
	370	+ *
	371	+ * Protected by @conf_sem.
	372	+ */
270	373	struct vmci_handle vmci_doorbell;
	374	+
	375	+ /**
	376	+ * @conf_sem: semaphore to protect the configuration and the statistics.
	377	+ */
	378	+ struct rw_semaphore conf_sem;
	379	+
	380	+ /**
	381	+ * @comm_lock: lock to protect the communication with the host.
	382	+ *
	383	+ * Lock ordering: @conf_sem -> @comm_lock .
	384	+ */
	385	+ spinlock_t comm_lock;
	386	+
	387	+ /**
	388	+ * @shrinker: shrinker interface that is used to avoid over-inflation.
	389	+ */
	390	+ struct shrinker shrinker;
	391	+
	392	+ /**
	393	+ * @shrinker_registered: whether the shrinker was registered.
	394	+ *
	395	+ * The shrinker interface does not handle gracefully the removal of
	396	+ * shrinker that was not registered before. This indication allows to
	397	+ * simplify the unregistration process.
	398	+ */
	399	+ bool shrinker_registered;
271	400	};
272	401
273	402	static struct vmballoon balloon;
	403	+
	404	+struct vmballoon_stats {
	405	+ /* timer / doorbell operations */
	406	+ atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
	407	+
	408	+ /* allocation statistics for huge and small pages */
	409	+ atomic64_t
	410	+ page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
	411	+
	412	+ /* Monitor operations: total operations, and failures */
	413	+ atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
	414	+};
	415	+
	416	+static inline bool is_vmballoon_stats_on(void)
	417	+{
	418	+ return IS_ENABLED(CONFIG_DEBUG_FS) &&
	419	+ static_branch_unlikely(&balloon_stat_enabled);
	420	+}
	421	+
	422	+static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
	423	+ enum vmballoon_op_stat_type type)
	424	+{
	425	+ if (is_vmballoon_stats_on())
	426	+ atomic64_inc(&b->stats->ops[op][type]);
	427	+}
	428	+
	429	+static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
	430	+ enum vmballoon_stat_general stat)
	431	+{
	432	+ if (is_vmballoon_stats_on())
	433	+ atomic64_inc(&b->stats->general_stat[stat]);
	434	+}
	435	+
	436	+static inline void vmballoon_stats_gen_add(struct vmballoon *b,
	437	+ enum vmballoon_stat_general stat,
	438	+ unsigned int val)
	439	+{
	440	+ if (is_vmballoon_stats_on())
	441	+ atomic64_add(val, &b->stats->general_stat[stat]);
	442	+}
	443	+
	444	+static inline void vmballoon_stats_page_inc(struct vmballoon *b,
	445	+ enum vmballoon_stat_page stat,
	446	+ enum vmballoon_page_size_type size)
	447	+{
	448	+ if (is_vmballoon_stats_on())
	449	+ atomic64_inc(&b->stats->page_stat[stat][size]);
	450	+}
	451	+
	452	+static inline void vmballoon_stats_page_add(struct vmballoon *b,
	453	+ enum vmballoon_stat_page stat,
	454	+ enum vmballoon_page_size_type size,
	455	+ unsigned int val)
	456	+{
	457	+ if (is_vmballoon_stats_on())
	458	+ atomic64_add(val, &b->stats->page_stat[stat][size]);
	459	+}
	460	+
	461	+static inline unsigned long
	462	+__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
	463	+ unsigned long arg2, unsigned long *result)
	464	+{
	465	+ unsigned long status, dummy1, dummy2, dummy3, local_result;
	466	+
	467	+ vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
	468	+
	469	+ asm volatile ("inl %%dx" :
	470	+ "=a"(status),
	471	+ "=c"(dummy1),
	472	+ "=d"(dummy2),
	473	+ "=b"(local_result),
	474	+ "=S"(dummy3) :
	475	+ "0"(VMW_BALLOON_HV_MAGIC),
	476	+ "1"(cmd),
	477	+ "2"(VMW_BALLOON_HV_PORT),
	478	+ "3"(arg1),
	479	+ "4"(arg2) :
	480	+ "memory");
	481	+
	482	+ /* update the result if needed */
	483	+ if (result)
	484	+ *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
	485	+ local_result;
	486	+
	487	+ /* update target when applicable */
	488	+ if (status == VMW_BALLOON_SUCCESS &&
	489	+ ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
	490	+ WRITE_ONCE(b->target, local_result);
	491	+
	492	+ if (status != VMW_BALLOON_SUCCESS &&
	493	+ status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
	494	+ vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
	495	+ pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
	496	+ __func__, vmballoon_cmd_names[cmd], arg1, arg2,
	497	+ status);
	498	+ }
	499	+
	500	+ /* mark reset required accordingly */
	501	+ if (status == VMW_BALLOON_ERROR_RESET)
	502	+ b->reset_required = true;
	503	+
	504	+ return status;
	505	+}
	506	+
	507	+static __always_inline unsigned long
	508	+vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
	509	+ unsigned long arg2)
	510	+{
	511	+ unsigned long dummy;
	512	+
	513	+ return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
	514	+}
274	515
275	516	/*
276	517	* Send "start" command to the host, communicating supported version
277	518	* of the protocol.
278	519	*/
279		-static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
	520	+static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
280	521	{
281		- unsigned long status, capabilities, dummy = 0;
282		- bool success;
	522	+ unsigned long status, capabilities;
283	523
284		- STATS_INC(b->stats.start);
285		-
286		- status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
	524	+ status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
	525	+ &capabilities);
287	526
288	527	switch (status) {
289	528	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
290	529	b->capabilities = capabilities;
291		- success = true;
292	530	break;
293	531	case VMW_BALLOON_SUCCESS:
294	532	b->capabilities = VMW_BALLOON_BASIC_CMDS;
295		- success = true;
296	533	break;
297	534	default:
298		- success = false;
	535	+ return -EIO;
299	536	}
300	537
301	538	/*
..	..	@@ -303,626 +540,802 @@
303	540	* reason disabled, do not use 2MB pages, since otherwise the legacy
304	541	* mechanism is used with 2MB pages, causing a failure.
305	542	*/
	543	+ b->max_page_size = VMW_BALLOON_4K_PAGE;
306	544	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
307	545	(b->capabilities & VMW_BALLOON_BATCHED_CMDS))
308		- b->supported_page_sizes = 2;
309		- else
310		- b->supported_page_sizes = 1;
	546	+ b->max_page_size = VMW_BALLOON_2M_PAGE;
311	547
312		- if (!success) {
313		- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
314		- STATS_INC(b->stats.start_fail);
315		- }
316		- return success;
	548	+
	549	+ return 0;
317	550	}
318	551
319		-static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
320		-{
321		- switch (status) {
322		- case VMW_BALLOON_SUCCESS:
323		- return true;
324		-
325		- case VMW_BALLOON_ERROR_RESET:
326		- b->reset_required = true;
327		- /* fall through */
328		-
329		- default:
330		- return false;
331		- }
332		-}
333		-
334		-/*
	552	+/**
	553	+ * vmballoon_send_guest_id - communicate guest type to the host.
	554	+ *
	555	+ * @b: pointer to the balloon.
	556	+ *
335	557	* Communicate guest type to the host so that it can adjust ballooning
336	558	* algorithm to the one most appropriate for the guest. This command
337	559	* is normally issued after sending "start" command and is part of
338	560	* standard reset sequence.
	561	+ *
	562	+ * Return: zero on success or appropriate error code.
339	563	*/
340		-static bool vmballoon_send_guest_id(struct vmballoon *b)
341		-{
342		- unsigned long status, dummy = 0;
343		-
344		- status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
345		- dummy);
346		-
347		- STATS_INC(b->stats.guest_type);
348		-
349		- if (vmballoon_check_status(b, status))
350		- return true;
351		-
352		- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
353		- STATS_INC(b->stats.guest_type_fail);
354		- return false;
355		-}
356		-
357		-static u16 vmballoon_page_size(bool is_2m_page)
358		-{
359		- if (is_2m_page)
360		- return 1 << VMW_BALLOON_2M_SHIFT;
361		-
362		- return 1;
363		-}
364		-
365		-/*
366		- * Retrieve desired balloon size from the host.
367		- */
368		-static bool vmballoon_send_get_target(struct vmballoon b, u32 new_target)
	564	+static int vmballoon_send_guest_id(struct vmballoon *b)
369	565	{
370	566	unsigned long status;
371		- unsigned long target;
372		- unsigned long limit;
373		- unsigned long dummy = 0;
374		- u32 limit32;
375	567
376		- /*
377		- * si_meminfo() is cheap. Moreover, we want to provide dynamic
378		- * max balloon size later. So let us call si_meminfo() every
379		- * iteration.
380		- */
381		- si_meminfo(&b->sysinfo);
382		- limit = b->sysinfo.totalram;
	568	+ status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
	569	+ VMW_BALLOON_GUEST_ID, 0);
383	570
384		- /* Ensure limit fits in 32-bits */
385		- limit32 = (u32)limit;
386		- if (limit != limit32)
387		- return false;
388		-
389		- /* update stats */
390		- STATS_INC(b->stats.target);
391		-
392		- status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
393		- if (vmballoon_check_status(b, status)) {
394		- *new_target = target;
395		- return true;
396		- }
397		-
398		- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
399		- STATS_INC(b->stats.target_fail);
400		- return false;
	571	+ return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
401	572	}
402	573
403		-/*
404		- * Notify the host about allocated page so that host can use it without
405		- * fear that guest will need it. Host may reject some pages, we need to
406		- * check the return value and maybe submit a different page.
	574	+/**
	575	+ * vmballoon_page_order() - return the order of the page
	576	+ * @page_size: the size of the page.
	577	+ *
	578	+ * Return: the allocation order.
407	579	*/
408		-static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
409		- unsigned int hv_status, unsigned int target)
	580	+static inline
	581	+unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
410	582	{
411		- unsigned long status, dummy = 0;
412		- u32 pfn32;
	583	+ return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
	584	+}
413	585
414		- pfn32 = (u32)pfn;
415		- if (pfn32 != pfn)
	586	+/**
	587	+ * vmballoon_page_in_frames() - returns the number of frames in a page.
	588	+ * @page_size: the size of the page.
	589	+ *
	590	+ * Return: the number of 4k frames.
	591	+ */
	592	+static inline unsigned int
	593	+vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
	594	+{
	595	+ return 1 << vmballoon_page_order(page_size);
	596	+}
	597	+
	598	+/**
	599	+ * vmballoon_mark_page_offline() - mark a page as offline
	600	+ * @page: pointer for the page.
	601	+ * @page_size: the size of the page.
	602	+ */
	603	+static void
	604	+vmballoon_mark_page_offline(struct page *page,
	605	+ enum vmballoon_page_size_type page_size)
	606	+{
	607	+ int i;
	608	+
	609	+ for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
	610	+ __SetPageOffline(page + i);
	611	+}
	612	+
	613	+/**
	614	+ * vmballoon_mark_page_online() - mark a page as online
	615	+ * @page: pointer for the page.
	616	+ * @page_size: the size of the page.
	617	+ */
	618	+static void
	619	+vmballoon_mark_page_online(struct page *page,
	620	+ enum vmballoon_page_size_type page_size)
	621	+{
	622	+ int i;
	623	+
	624	+ for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
	625	+ __ClearPageOffline(page + i);
	626	+}
	627	+
	628	+/**
	629	+ * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
	630	+ *
	631	+ * @b: pointer to the balloon.
	632	+ *
	633	+ * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
	634	+ * by the host-guest protocol and EIO if an error occurred in communicating with
	635	+ * the host.
	636	+ */
	637	+static int vmballoon_send_get_target(struct vmballoon *b)
	638	+{
	639	+ unsigned long status;
	640	+ unsigned long limit;
	641	+
	642	+ limit = totalram_pages();
	643	+
	644	+ /* Ensure limit fits in 32-bits if 64-bit targets are not supported */
	645	+ if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
	646	+ limit != (u32)limit)
416	647	return -EINVAL;
417	648
418		- STATS_INC(b->stats.lock[false]);
	649	+ status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
419	650
420		- hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, target);
421		- if (vmballoon_check_status(b, status))
	651	+ return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
	652	+}
	653	+
	654	+/**
	655	+ * vmballoon_alloc_page_list - allocates a list of pages.
	656	+ *
	657	+ * @b: pointer to the balloon.
	658	+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
	659	+ * @req_n_pages: the number of requested pages.
	660	+ *
	661	+ * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
	662	+ * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
	663	+ *
	664	+ * Return: zero on success or error code otherwise.
	665	+ */
	666	+static int vmballoon_alloc_page_list(struct vmballoon *b,
	667	+ struct vmballoon_ctl *ctl,
	668	+ unsigned int req_n_pages)
	669	+{
	670	+ struct page *page;
	671	+ unsigned int i;
	672	+
	673	+ for (i = 0; i < req_n_pages; i++) {
	674	+ /*
	675	+ * First check if we happen to have pages that were allocated
	676	+ * before. This happens when 2MB page rejected during inflation
	677	+ * by the hypervisor, and then split into 4KB pages.
	678	+ */
	679	+ if (!list_empty(&ctl->prealloc_pages)) {
	680	+ page = list_first_entry(&ctl->prealloc_pages,
	681	+ struct page, lru);
	682	+ list_del(&page->lru);
	683	+ } else {
	684	+ if (ctl->page_size == VMW_BALLOON_2M_PAGE)
	685	+ page = alloc_pages(__GFP_HIGHMEM\|__GFP_NOWARN\|
	686	+ __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
	687	+ else
	688	+ page = balloon_page_alloc();
	689	+
	690	+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
	691	+ ctl->page_size);
	692	+ }
	693	+
	694	+ if (page) {
	695	+ /* Success. Add the page to the list and continue. */
	696	+ list_add(&page->lru, &ctl->pages);
	697	+ continue;
	698	+ }
	699	+
	700	+ /* Allocation failed. Update statistics and stop. */
	701	+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
	702	+ ctl->page_size);
	703	+ break;
	704	+ }
	705	+
	706	+ ctl->n_pages = i;
	707	+
	708	+ return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
	709	+}
	710	+
	711	+/**
	712	+ * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
	713	+ *
	714	+ * @b: pointer for %struct vmballoon.
	715	+ * @page: pointer for the page whose result should be handled.
	716	+ * @page_size: size of the page.
	717	+ * @status: status of the operation as provided by the hypervisor.
	718	+ */
	719	+static int vmballoon_handle_one_result(struct vmballoon b, struct page page,
	720	+ enum vmballoon_page_size_type page_size,
	721	+ unsigned long status)
	722	+{
	723	+ /* On success do nothing. The page is already on the balloon list. */
	724	+ if (likely(status == VMW_BALLOON_SUCCESS))
422	725	return 0;
423	726
424		- pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
425		- STATS_INC(b->stats.lock_fail[false]);
	727	+ pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
	728	+ page_to_pfn(page), status,
	729	+ vmballoon_page_size_names[page_size]);
	730	+
	731	+ /* Error occurred */
	732	+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
	733	+ page_size);
	734	+
426	735	return -EIO;
427	736	}
428	737
429		-static int vmballoon_send_batched_lock(struct vmballoon *b,
430		- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
	738	+/**
	739	+ * vmballoon_status_page - returns the status of (un)lock operation
	740	+ *
	741	+ * @b: pointer to the balloon.
	742	+ * @idx: index for the page for which the operation is performed.
	743	+ * @p: pointer to where the page struct is returned.
	744	+ *
	745	+ * Following a lock or unlock operation, returns the status of the operation for
	746	+ * an individual page. Provides the page that the operation was performed on on
	747	+ * the @page argument.
	748	+ *
	749	+ * Returns: The status of a lock or unlock operation for an individual page.
	750	+ */
	751	+static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
	752	+ struct page **p)
431	753	{
432		- unsigned long status;
433		- unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
	754	+ if (static_branch_likely(&vmw_balloon_batching)) {
	755	+ /* batching mode */
	756	+ *p = pfn_to_page(b->batch_page[idx].pfn);
	757	+ return b->batch_page[idx].status;
	758	+ }
434	759
435		- STATS_INC(b->stats.lock[is_2m_pages]);
	760	+ /* non-batching mode */
	761	+ *p = b->page;
436	762
437		- if (is_2m_pages)
438		- status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
439		- *target);
	763	+ /*
	764	+ * If a failure occurs, the indication will be provided in the status
	765	+ * of the entire operation, which is considered before the individual
	766	+ * page status. So for non-batching mode, the indication is always of
	767	+ * success.
	768	+ */
	769	+ return VMW_BALLOON_SUCCESS;
	770	+}
	771	+
	772	+/**
	773	+ * vmballoon_lock_op - notifies the host about inflated/deflated pages.
	774	+ * @b: pointer to the balloon.
	775	+ * @num_pages: number of inflated/deflated pages.
	776	+ * @page_size: size of the page.
	777	+ * @op: the type of operation (lock or unlock).
	778	+ *
	779	+ * Notify the host about page(s) that were ballooned (or removed from the
	780	+ * balloon) so that host can use it without fear that guest will need it (or
	781	+ * stop using them since the VM does). Host may reject some pages, we need to
	782	+ * check the return value and maybe submit a different page. The pages that are
	783	+ * inflated/deflated are pointed by @b->page.
	784	+ *
	785	+ * Return: result as provided by the hypervisor.
	786	+ */
	787	+static unsigned long vmballoon_lock_op(struct vmballoon *b,
	788	+ unsigned int num_pages,
	789	+ enum vmballoon_page_size_type page_size,
	790	+ enum vmballoon_op op)
	791	+{
	792	+ unsigned long cmd, pfn;
	793	+
	794	+ lockdep_assert_held(&b->comm_lock);
	795	+
	796	+ if (static_branch_likely(&vmw_balloon_batching)) {
	797	+ if (op == VMW_BALLOON_INFLATE)
	798	+ cmd = page_size == VMW_BALLOON_2M_PAGE ?
	799	+ VMW_BALLOON_CMD_BATCHED_2M_LOCK :
	800	+ VMW_BALLOON_CMD_BATCHED_LOCK;
	801	+ else
	802	+ cmd = page_size == VMW_BALLOON_2M_PAGE ?
	803	+ VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
	804	+ VMW_BALLOON_CMD_BATCHED_UNLOCK;
	805	+
	806	+ pfn = PHYS_PFN(virt_to_phys(b->batch_page));
	807	+ } else {
	808	+ cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
	809	+ VMW_BALLOON_CMD_UNLOCK;
	810	+ pfn = page_to_pfn(b->page);
	811	+
	812	+ /* In non-batching mode, PFNs must fit in 32-bit */
	813	+ if (unlikely(pfn != (u32)pfn))
	814	+ return VMW_BALLOON_ERROR_PPN_INVALID;
	815	+ }
	816	+
	817	+ return vmballoon_cmd(b, cmd, pfn, num_pages);
	818	+}
	819	+
	820	+/**
	821	+ * vmballoon_add_page - adds a page towards lock/unlock operation.
	822	+ *
	823	+ * @b: pointer to the balloon.
	824	+ * @idx: index of the page to be ballooned in this batch.
	825	+ * @p: pointer to the page that is about to be ballooned.
	826	+ *
	827	+ * Adds the page to be ballooned. Must be called while holding @comm_lock.
	828	+ */
	829	+static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
	830	+ struct page *p)
	831	+{
	832	+ lockdep_assert_held(&b->comm_lock);
	833	+
	834	+ if (static_branch_likely(&vmw_balloon_batching))
	835	+ b->batch_page[idx] = (struct vmballoon_batch_entry)
	836	+ { .pfn = page_to_pfn(p) };
440	837	else
441		- status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
442		- *target);
	838	+ b->page = p;
	839	+}
443	840
444		- if (vmballoon_check_status(b, status))
	841	+/**
	842	+ * vmballoon_lock - lock or unlock a batch of pages.
	843	+ *
	844	+ * @b: pointer to the balloon.
	845	+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
	846	+ *
	847	+ * Notifies the host of about ballooned pages (after inflation or deflation,
	848	+ * according to @ctl). If the host rejects the page put it on the
	849	+ * @ctl refuse list. These refused page are then released when moving to the
	850	+ * next size of pages.
	851	+ *
	852	+ * Note that we neither free any @page here nor put them back on the ballooned
	853	+ * pages list. Instead we queue it for later processing. We do that for several
	854	+ * reasons. First, we do not want to free the page under the lock. Second, it
	855	+ * allows us to unify the handling of lock and unlock. In the inflate case, the
	856	+ * caller will check if there are too many refused pages and release them.
	857	+ * Although it is not identical to the past behavior, it should not affect
	858	+ * performance.
	859	+ */
	860	+static int vmballoon_lock(struct vmballoon b, struct vmballoon_ctl ctl)
	861	+{
	862	+ unsigned long batch_status;
	863	+ struct page *page;
	864	+ unsigned int i, num_pages;
	865	+
	866	+ num_pages = ctl->n_pages;
	867	+ if (num_pages == 0)
445	868	return 0;
446	869
447		- pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
448		- STATS_INC(b->stats.lock_fail[is_2m_pages]);
449		- return 1;
450		-}
	870	+ /* communication with the host is done under the communication lock */
	871	+ spin_lock(&b->comm_lock);
451	872
452		-/*
453		- * Notify the host that guest intends to release given page back into
454		- * the pool of available (to the guest) pages.
455		- */
456		-static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
457		- unsigned int *target)
458		-{
459		- unsigned long status, dummy = 0;
460		- u32 pfn32;
	873	+ i = 0;
	874	+ list_for_each_entry(page, &ctl->pages, lru)
	875	+ vmballoon_add_page(b, i++, page);
461	876
462		- pfn32 = (u32)pfn;
463		- if (pfn32 != pfn)
464		- return false;
	877	+ batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
	878	+ ctl->op);
465	879
466		- STATS_INC(b->stats.unlock[false]);
	880	+ /*
	881	+ * Iterate over the pages in the provided list. Since we are changing
	882	+ * @ctl->n_pages we are saving the original value in @num_pages and
	883	+ * use this value to bound the loop.
	884	+ */
	885	+ for (i = 0; i < num_pages; i++) {
	886	+ unsigned long status;
467	887
468		- status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
469		- if (vmballoon_check_status(b, status))
470		- return true;
471		-
472		- pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
473		- STATS_INC(b->stats.unlock_fail[false]);
474		- return false;
475		-}
476		-
477		-static bool vmballoon_send_batched_unlock(struct vmballoon *b,
478		- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
479		-{
480		- unsigned long status;
481		- unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
482		-
483		- STATS_INC(b->stats.unlock[is_2m_pages]);
484		-
485		- if (is_2m_pages)
486		- status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
487		- *target);
488		- else
489		- status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
490		- *target);
491		-
492		- if (vmballoon_check_status(b, status))
493		- return true;
494		-
495		- pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
496		- STATS_INC(b->stats.unlock_fail[is_2m_pages]);
497		- return false;
498		-}
499		-
500		-static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
501		-{
502		- if (is_2m_page)
503		- return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
504		-
505		- return alloc_page(flags);
506		-}
507		-
508		-static void vmballoon_free_page(struct page *page, bool is_2m_page)
509		-{
510		- if (is_2m_page)
511		- __free_pages(page, VMW_BALLOON_2M_SHIFT);
512		- else
513		- __free_page(page);
514		-}
515		-
516		-/*
517		- * Quickly release all pages allocated for the balloon. This function is
518		- * called when host decides to "reset" balloon for one reason or another.
519		- * Unlike normal "deflate" we do not (shall not) notify host of the pages
520		- * being released.
521		- */
522		-static void vmballoon_pop(struct vmballoon *b)
523		-{
524		- struct page page, next;
525		- unsigned is_2m_pages;
526		-
527		- for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
528		- is_2m_pages++) {
529		- struct vmballoon_page_size *page_size =
530		- &b->page_sizes[is_2m_pages];
531		- u16 size_per_page = vmballoon_page_size(is_2m_pages);
532		-
533		- list_for_each_entry_safe(page, next, &page_size->pages, lru) {
534		- list_del(&page->lru);
535		- vmballoon_free_page(page, is_2m_pages);
536		- STATS_INC(b->stats.free[is_2m_pages]);
537		- b->size -= size_per_page;
538		- cond_resched();
539		- }
540		- }
541		-
542		- /* Clearing the batch_page unconditionally has no adverse effect */
543		- free_page((unsigned long)b->batch_page);
544		- b->batch_page = NULL;
545		-}
546		-
547		-/*
548		- * Notify the host of a ballooned page. If host rejects the page put it on the
549		- * refuse list, those refused page are then released at the end of the
550		- * inflation cycle.
551		- */
552		-static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
553		- bool is_2m_pages, unsigned int *target)
554		-{
555		- int locked, hv_status;
556		- struct page *page = b->page;
557		- struct vmballoon_page_size *page_size = &b->page_sizes[false];
558		-
559		- /* is_2m_pages can never happen as 2m pages support implies batching */
560		-
561		- locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
562		- target);
563		- if (locked) {
564		- STATS_INC(b->stats.refused_alloc[false]);
565		-
566		- if (locked == -EIO &&
567		- (hv_status == VMW_BALLOON_ERROR_RESET \|\|
568		- hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
569		- vmballoon_free_page(page, false);
570		- return -EIO;
571		- }
	888	+ status = vmballoon_status_page(b, i, &page);
572	889
573	890	/*
574		- * Place page on the list of non-balloonable pages
575		- * and retry allocation, unless we already accumulated
576		- * too many of them, in which case take a breather.
	891	+ * Failure of the whole batch overrides a single operation
	892	+ * results.
577	893	*/
578		- if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
579		- page_size->n_refused_pages++;
580		- list_add(&page->lru, &page_size->refused_pages);
581		- } else {
582		- vmballoon_free_page(page, false);
583		- }
584		- return locked;
	894	+ if (batch_status != VMW_BALLOON_SUCCESS)
	895	+ status = batch_status;
	896	+
	897	+ /* Continue if no error happened */
	898	+ if (!vmballoon_handle_one_result(b, page, ctl->page_size,
	899	+ status))
	900	+ continue;
	901	+
	902	+ /*
	903	+ * Error happened. Move the pages to the refused list and update
	904	+ * the pages number.
	905	+ */
	906	+ list_move(&page->lru, &ctl->refused_pages);
	907	+ ctl->n_pages--;
	908	+ ctl->n_refused_pages++;
585	909	}
586	910
587		- /* track allocated page */
588		- list_add(&page->lru, &page_size->pages);
	911	+ spin_unlock(&b->comm_lock);
589	912
590		- /* update balloon size */
591		- b->size++;
592		-
593		- return 0;
	913	+ return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
594	914	}
595	915
596		-static int vmballoon_lock_batched_page(struct vmballoon *b,
597		- unsigned int num_pages, bool is_2m_pages, unsigned int *target)
598		-{
599		- int locked, i;
600		- u16 size_per_page = vmballoon_page_size(is_2m_pages);
601		-
602		- locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
603		- target);
604		- if (locked > 0) {
605		- for (i = 0; i < num_pages; i++) {
606		- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
607		- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
608		-
609		- vmballoon_free_page(p, is_2m_pages);
610		- }
611		-
612		- return -EIO;
613		- }
614		-
615		- for (i = 0; i < num_pages; i++) {
616		- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
617		- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
618		- struct vmballoon_page_size *page_size =
619		- &b->page_sizes[is_2m_pages];
620		-
621		- locked = vmballoon_batch_get_status(b->batch_page, i);
622		-
623		- switch (locked) {
624		- case VMW_BALLOON_SUCCESS:
625		- list_add(&p->lru, &page_size->pages);
626		- b->size += size_per_page;
627		- break;
628		- case VMW_BALLOON_ERROR_PPN_PINNED:
629		- case VMW_BALLOON_ERROR_PPN_INVALID:
630		- if (page_size->n_refused_pages
631		- < VMW_BALLOON_MAX_REFUSED) {
632		- list_add(&p->lru, &page_size->refused_pages);
633		- page_size->n_refused_pages++;
634		- break;
635		- }
636		- /* Fallthrough */
637		- case VMW_BALLOON_ERROR_RESET:
638		- case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
639		- vmballoon_free_page(p, is_2m_pages);
640		- break;
641		- default:
642		- /* This should never happen */
643		- WARN_ON_ONCE(true);
644		- }
645		- }
646		-
647		- return 0;
648		-}
649		-
650		-/*
651		- * Release the page allocated for the balloon. Note that we first notify
652		- * the host so it can make sure the page will be available for the guest
653		- * to use, if needed.
	916	+/**
	917	+ * vmballoon_release_page_list() - Releases a page list
	918	+ *
	919	+ * @page_list: list of pages to release.
	920	+ * @n_pages: pointer to the number of pages.
	921	+ * @page_size: whether the pages in the list are 2MB (or else 4KB).
	922	+ *
	923	+ * Releases the list of pages and zeros the number of pages.
654	924	*/
655		-static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
656		- bool is_2m_pages, unsigned int *target)
	925	+static void vmballoon_release_page_list(struct list_head *page_list,
	926	+ int *n_pages,
	927	+ enum vmballoon_page_size_type page_size)
657	928	{
658		- struct page *page = b->page;
659		- struct vmballoon_page_size *page_size = &b->page_sizes[false];
	929	+ struct page page, tmp;
660	930
661		- /* is_2m_pages can never happen as 2m pages support implies batching */
662		-
663		- if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
664		- list_add(&page->lru, &page_size->pages);
665		- return -EIO;
	931	+ list_for_each_entry_safe(page, tmp, page_list, lru) {
	932	+ list_del(&page->lru);
	933	+ __free_pages(page, vmballoon_page_order(page_size));
666	934	}
667	935
668		- /* deallocate page */
669		- vmballoon_free_page(page, false);
670		- STATS_INC(b->stats.free[false]);
671		-
672		- /* update balloon size */
673		- b->size--;
674		-
675		- return 0;
	936	+ if (n_pages)
	937	+ *n_pages = 0;
676	938	}
677	939
678		-static int vmballoon_unlock_batched_page(struct vmballoon *b,
679		- unsigned int num_pages, bool is_2m_pages,
680		- unsigned int *target)
681		-{
682		- int locked, i, ret = 0;
683		- bool hv_success;
684		- u16 size_per_page = vmballoon_page_size(is_2m_pages);
685		-
686		- hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
687		- target);
688		- if (!hv_success)
689		- ret = -EIO;
690		-
691		- for (i = 0; i < num_pages; i++) {
692		- u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
693		- struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
694		- struct vmballoon_page_size *page_size =
695		- &b->page_sizes[is_2m_pages];
696		-
697		- locked = vmballoon_batch_get_status(b->batch_page, i);
698		- if (!hv_success \|\| locked != VMW_BALLOON_SUCCESS) {
699		- /*
700		- * That page wasn't successfully unlocked by the
701		- * hypervisor, re-add it to the list of pages owned by
702		- * the balloon driver.
703		- */
704		- list_add(&p->lru, &page_size->pages);
705		- } else {
706		- /* deallocate page */
707		- vmballoon_free_page(p, is_2m_pages);
708		- STATS_INC(b->stats.free[is_2m_pages]);
709		-
710		- /* update balloon size */
711		- b->size -= size_per_page;
712		- }
713		- }
714		-
715		- return ret;
716		-}
717	940
718	941	/*
719	942	* Release pages that were allocated while attempting to inflate the
720	943	* balloon but were refused by the host for one reason or another.
721	944	*/
722	945	static void vmballoon_release_refused_pages(struct vmballoon *b,
723		- bool is_2m_pages)
	946	+ struct vmballoon_ctl *ctl)
724	947	{
725		- struct page page, next;
726		- struct vmballoon_page_size *page_size =
727		- &b->page_sizes[is_2m_pages];
	948	+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
	949	+ ctl->page_size);
728	950
729		- list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
730		- list_del(&page->lru);
731		- vmballoon_free_page(page, is_2m_pages);
732		- STATS_INC(b->stats.refused_free[is_2m_pages]);
	951	+ vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
	952	+ ctl->page_size);
	953	+}
	954	+
	955	+/**
	956	+ * vmballoon_change - retrieve the required balloon change
	957	+ *
	958	+ * @b: pointer for the balloon.
	959	+ *
	960	+ * Return: the required change for the balloon size. A positive number
	961	+ * indicates inflation, a negative number indicates a deflation.
	962	+ */
	963	+static int64_t vmballoon_change(struct vmballoon *b)
	964	+{
	965	+ int64_t size, target;
	966	+
	967	+ size = atomic64_read(&b->size);
	968	+ target = READ_ONCE(b->target);
	969	+
	970	+ /*
	971	+ * We must cast first because of int sizes
	972	+ * Otherwise we might get huge positives instead of negatives
	973	+ */
	974	+
	975	+ if (b->reset_required)
	976	+ return 0;
	977	+
	978	+ /* consider a 2MB slack on deflate, unless the balloon is emptied */
	979	+ if (target < size && target != 0 &&
	980	+ size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
	981	+ return 0;
	982	+
	983	+ /* If an out-of-memory recently occurred, inflation is disallowed. */
	984	+ if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
	985	+ return 0;
	986	+
	987	+ return target - size;
	988	+}
	989	+
	990	+/**
	991	+ * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
	992	+ *
	993	+ * @b: pointer to balloon.
	994	+ * @pages: list of pages to enqueue.
	995	+ * @n_pages: pointer to number of pages in list. The value is zeroed.
	996	+ * @page_size: whether the pages are 2MB or 4KB pages.
	997	+ *
	998	+ * Enqueues the provides list of pages in the ballooned page list, clears the
	999	+ * list and zeroes the number of pages that was provided.
	1000	+ */
	1001	+static void vmballoon_enqueue_page_list(struct vmballoon *b,
	1002	+ struct list_head *pages,
	1003	+ unsigned int *n_pages,
	1004	+ enum vmballoon_page_size_type page_size)
	1005	+{
	1006	+ unsigned long flags;
	1007	+ struct page *page;
	1008	+
	1009	+ if (page_size == VMW_BALLOON_4K_PAGE) {
	1010	+ balloon_page_list_enqueue(&b->b_dev_info, pages);
	1011	+ } else {
	1012	+ /*
	1013	+ * Keep the huge pages in a local list which is not available
	1014	+ * for the balloon compaction mechanism.
	1015	+ */
	1016	+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
	1017	+
	1018	+ list_for_each_entry(page, pages, lru) {
	1019	+ vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
	1020	+ }
	1021	+
	1022	+ list_splice_init(pages, &b->huge_pages);
	1023	+ __count_vm_events(BALLOON_INFLATE, n_pages
	1024	+ vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
	1025	+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
733	1026	}
734	1027
735		- page_size->n_refused_pages = 0;
	1028	+ *n_pages = 0;
736	1029	}
737	1030
738		-static void vmballoon_add_page(struct vmballoon b, int idx, struct page p)
	1031	+/**
	1032	+ * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
	1033	+ *
	1034	+ * @b: pointer to balloon.
	1035	+ * @pages: list of pages to enqueue.
	1036	+ * @n_pages: pointer to number of pages in list. The value is zeroed.
	1037	+ * @page_size: whether the pages are 2MB or 4KB pages.
	1038	+ * @n_req_pages: the number of requested pages.
	1039	+ *
	1040	+ * Dequeues the number of requested pages from the balloon for deflation. The
	1041	+ * number of dequeued pages may be lower, if not enough pages in the requested
	1042	+ * size are available.
	1043	+ */
	1044	+static void vmballoon_dequeue_page_list(struct vmballoon *b,
	1045	+ struct list_head *pages,
	1046	+ unsigned int *n_pages,
	1047	+ enum vmballoon_page_size_type page_size,
	1048	+ unsigned int n_req_pages)
739	1049	{
740		- b->page = p;
	1050	+ struct page page, tmp;
	1051	+ unsigned int i = 0;
	1052	+ unsigned long flags;
	1053	+
	1054	+ /* In the case of 4k pages, use the compaction infrastructure */
	1055	+ if (page_size == VMW_BALLOON_4K_PAGE) {
	1056	+ *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
	1057	+ n_req_pages);
	1058	+ return;
	1059	+ }
	1060	+
	1061	+ /* 2MB pages */
	1062	+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
	1063	+ list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
	1064	+ vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
	1065	+
	1066	+ list_move(&page->lru, pages);
	1067	+ if (++i == n_req_pages)
	1068	+ break;
	1069	+ }
	1070	+
	1071	+ __count_vm_events(BALLOON_DEFLATE,
	1072	+ i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
	1073	+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
	1074	+ *n_pages = i;
741	1075	}
742	1076
743		-static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
744		- struct page *p)
	1077	+/**
	1078	+ * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
	1079	+ *
	1080	+ * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
	1081	+ * due to one or few 4KB pages. These 2MB pages may keep being allocated and
	1082	+ * then being refused. To prevent this case, this function splits the refused
	1083	+ * pages into 4KB pages and adds them into @prealloc_pages list.
	1084	+ *
	1085	+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
	1086	+ */
	1087	+static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
745	1088	{
746		- vmballoon_batch_set_pa(b->batch_page, idx,
747		- (u64)page_to_pfn(p) << PAGE_SHIFT);
	1089	+ struct page page, tmp;
	1090	+ unsigned int i, order;
	1091	+
	1092	+ order = vmballoon_page_order(ctl->page_size);
	1093	+
	1094	+ list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
	1095	+ list_del(&page->lru);
	1096	+ split_page(page, order);
	1097	+ for (i = 0; i < (1 << order); i++)
	1098	+ list_add(&page[i].lru, &ctl->prealloc_pages);
	1099	+ }
	1100	+ ctl->n_refused_pages = 0;
748	1101	}
749	1102
750		-/*
751		- * Inflate the balloon towards its target size. Note that we try to limit
752		- * the rate of allocation to make sure we are not choking the rest of the
753		- * system.
	1103	+/**
	1104	+ * vmballoon_inflate() - Inflate the balloon towards its target size.
	1105	+ *
	1106	+ * @b: pointer to the balloon.
754	1107	*/
755	1108	static void vmballoon_inflate(struct vmballoon *b)
756	1109	{
757		- unsigned int num_pages = 0;
758		- int error = 0;
759		- gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
760		- bool is_2m_pages;
	1110	+ int64_t to_inflate_frames;
	1111	+ struct vmballoon_ctl ctl = {
	1112	+ .pages = LIST_HEAD_INIT(ctl.pages),
	1113	+ .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
	1114	+ .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
	1115	+ .page_size = b->max_page_size,
	1116	+ .op = VMW_BALLOON_INFLATE
	1117	+ };
761	1118
762		- pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
	1119	+ while ((to_inflate_frames = vmballoon_change(b)) > 0) {
	1120	+ unsigned int to_inflate_pages, page_in_frames;
	1121	+ int alloc_error, lock_error = 0;
763	1122
764		- /*
765		- * First try NOSLEEP page allocations to inflate balloon.
766		- *
767		- * If we do not throttle nosleep allocations, we can drain all
768		- * free pages in the guest quickly (if the balloon target is high).
769		- * As a side-effect, draining free pages helps to inform (force)
770		- * the guest to start swapping if balloon target is not met yet,
771		- * which is a desired behavior. However, balloon driver can consume
772		- * all available CPU cycles if too many pages are allocated in a
773		- * second. Therefore, we throttle nosleep allocations even when
774		- * the guest is not under memory pressure. OTOH, if we have already
775		- * predicted that the guest is under memory pressure, then we
776		- * slowdown page allocations considerably.
777		- */
	1123	+ VM_BUG_ON(!list_empty(&ctl.pages));
	1124	+ VM_BUG_ON(ctl.n_pages != 0);
778	1125
779		- /*
780		- * Start with no sleep allocation rate which may be higher
781		- * than sleeping allocation rate.
782		- */
783		- is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
	1126	+ page_in_frames = vmballoon_page_in_frames(ctl.page_size);
784	1127
785		- pr_debug("%s - goal: %d", __func__, b->target - b->size);
	1128	+ to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
	1129	+ DIV_ROUND_UP_ULL(to_inflate_frames,
	1130	+ page_in_frames));
786	1131
787		- while (!b->reset_required &&
788		- b->size + num_pages * vmballoon_page_size(is_2m_pages)
789		- < b->target) {
790		- struct page *page;
	1132	+ /* Start by allocating */
	1133	+ alloc_error = vmballoon_alloc_page_list(b, &ctl,
	1134	+ to_inflate_pages);
791	1135
792		- if (flags == VMW_PAGE_ALLOC_NOSLEEP)
793		- STATS_INC(b->stats.alloc[is_2m_pages]);
794		- else
795		- STATS_INC(b->stats.sleep_alloc);
	1136	+ /* Actually lock the pages by telling the hypervisor */
	1137	+ lock_error = vmballoon_lock(b, &ctl);
796	1138
797		- page = vmballoon_alloc_page(flags, is_2m_pages);
798		- if (!page) {
799		- STATS_INC(b->stats.alloc_fail[is_2m_pages]);
	1139	+ /*
	1140	+ * If an error indicates that something serious went wrong,
	1141	+ * stop the inflation.
	1142	+ */
	1143	+ if (lock_error)
	1144	+ break;
800	1145
801		- if (is_2m_pages) {
802		- b->ops->lock(b, num_pages, true, &b->target);
	1146	+ /* Update the balloon size */
	1147	+ atomic64_add(ctl.n_pages * page_in_frames, &b->size);
803	1148
804		- /*
805		- * ignore errors from locking as we now switch
806		- * to 4k pages and we might get different
807		- * errors.
808		- */
	1149	+ vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
	1150	+ ctl.page_size);
809	1151
810		- num_pages = 0;
811		- is_2m_pages = false;
812		- continue;
813		- }
814		-
815		- if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
816		- /*
817		- * CANSLEEP page allocation failed, so guest
818		- * is under severe memory pressure. We just log
819		- * the event, but do not stop the inflation
820		- * due to its negative impact on performance.
821		- */
822		- STATS_INC(b->stats.sleep_alloc_fail);
	1152	+ /*
	1153	+ * If allocation failed or the number of refused pages exceeds
	1154	+ * the maximum allowed, move to the next page size.
	1155	+ */
	1156	+ if (alloc_error \|\|
	1157	+ ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
	1158	+ if (ctl.page_size == VMW_BALLOON_4K_PAGE)
823	1159	break;
824		- }
825	1160
826	1161	/*
827		- * NOSLEEP page allocation failed, so the guest is
828		- * under memory pressure. Slowing down page alloctions
829		- * seems to be reasonable, but doing so might actually
830		- * cause the hypervisor to throttle us down, resulting
831		- * in degraded performance. We will count on the
832		- * scheduler and standard memory management mechanisms
833		- * for now.
	1162	+ * Split the refused pages to 4k. This will also empty
	1163	+ * the refused pages list.
834	1164	*/
835		- flags = VMW_PAGE_ALLOC_CANSLEEP;
836		- continue;
837		- }
838		-
839		- b->ops->add_page(b, num_pages++, page);
840		- if (num_pages == b->batch_max_pages) {
841		- error = b->ops->lock(b, num_pages, is_2m_pages,
842		- &b->target);
843		- num_pages = 0;
844		- if (error)
845		- break;
	1165	+ vmballoon_split_refused_pages(&ctl);
	1166	+ ctl.page_size--;
846	1167	}
847	1168
848	1169	cond_resched();
849	1170	}
850	1171
851		- if (num_pages > 0)
852		- b->ops->lock(b, num_pages, is_2m_pages, &b->target);
	1172	+ /*
	1173	+ * Release pages that were allocated while attempting to inflate the
	1174	+ * balloon but were refused by the host for one reason or another,
	1175	+ * and update the statistics.
	1176	+ */
	1177	+ if (ctl.n_refused_pages != 0)
	1178	+ vmballoon_release_refused_pages(b, &ctl);
853	1179
854		- vmballoon_release_refused_pages(b, true);
855		- vmballoon_release_refused_pages(b, false);
	1180	+ vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
856	1181	}
857	1182
858		-/*
	1183	+/**
	1184	+ * vmballoon_deflate() - Decrease the size of the balloon.
	1185	+ *
	1186	+ * @b: pointer to the balloon
	1187	+ * @n_frames: the number of frames to deflate. If zero, automatically
	1188	+ * calculated according to the target size.
	1189	+ * @coordinated: whether to coordinate with the host
	1190	+ *
859	1191	* Decrease the size of the balloon allowing guest to use more memory.
	1192	+ *
	1193	+ * Return: The number of deflated frames (i.e., basic page size units)
860	1194	*/
861		-static void vmballoon_deflate(struct vmballoon *b)
	1195	+static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
	1196	+ bool coordinated)
862	1197	{
863		- unsigned is_2m_pages;
864		-
865		- pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
	1198	+ unsigned long deflated_frames = 0;
	1199	+ unsigned long tried_frames = 0;
	1200	+ struct vmballoon_ctl ctl = {
	1201	+ .pages = LIST_HEAD_INIT(ctl.pages),
	1202	+ .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
	1203	+ .page_size = VMW_BALLOON_4K_PAGE,
	1204	+ .op = VMW_BALLOON_DEFLATE
	1205	+ };
866	1206
867	1207	/* free pages to reach target */
868		- for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
869		- is_2m_pages++) {
870		- struct page page, next;
871		- unsigned int num_pages = 0;
872		- struct vmballoon_page_size *page_size =
873		- &b->page_sizes[is_2m_pages];
	1208	+ while (true) {
	1209	+ unsigned int to_deflate_pages, n_unlocked_frames;
	1210	+ unsigned int page_in_frames;
	1211	+ int64_t to_deflate_frames;
	1212	+ bool deflated_all;
874	1213
875		- list_for_each_entry_safe(page, next, &page_size->pages, lru) {
876		- if (b->reset_required \|\|
877		- (b->target > 0 &&
878		- b->size - num_pages
879		- * vmballoon_page_size(is_2m_pages)
880		- < b->target + vmballoon_page_size(true)))
	1214	+ page_in_frames = vmballoon_page_in_frames(ctl.page_size);
	1215	+
	1216	+ VM_BUG_ON(!list_empty(&ctl.pages));
	1217	+ VM_BUG_ON(ctl.n_pages);
	1218	+ VM_BUG_ON(!list_empty(&ctl.refused_pages));
	1219	+ VM_BUG_ON(ctl.n_refused_pages);
	1220	+
	1221	+ /*
	1222	+ * If we were requested a specific number of frames, we try to
	1223	+ * deflate this number of frames. Otherwise, deflation is
	1224	+ * performed according to the target and balloon size.
	1225	+ */
	1226	+ to_deflate_frames = n_frames ? n_frames - tried_frames :
	1227	+ -vmballoon_change(b);
	1228	+
	1229	+ /* break if no work to do */
	1230	+ if (to_deflate_frames <= 0)
	1231	+ break;
	1232	+
	1233	+ /*
	1234	+ * Calculate the number of frames based on current page size,
	1235	+ * but limit the deflated frames to a single chunk
	1236	+ */
	1237	+ to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
	1238	+ DIV_ROUND_UP_ULL(to_deflate_frames,
	1239	+ page_in_frames));
	1240	+
	1241	+ /* First take the pages from the balloon pages. */
	1242	+ vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
	1243	+ ctl.page_size, to_deflate_pages);
	1244	+
	1245	+ /*
	1246	+ * Before pages are moving to the refused list, count their
	1247	+ * frames as frames that we tried to deflate.
	1248	+ */
	1249	+ tried_frames += ctl.n_pages * page_in_frames;
	1250	+
	1251	+ /*
	1252	+ * Unlock the pages by communicating with the hypervisor if the
	1253	+ * communication is coordinated (i.e., not pop). We ignore the
	1254	+ * return code. Instead we check if all the pages we manage to
	1255	+ * unlock all the pages. If we failed, we will move to the next
	1256	+ * page size, and would eventually try again later.
	1257	+ */
	1258	+ if (coordinated)
	1259	+ vmballoon_lock(b, &ctl);
	1260	+
	1261	+ /*
	1262	+ * Check if we deflated enough. We will move to the next page
	1263	+ * size if we did not manage to do so. This calculation takes
	1264	+ * place now, as once the pages are released, the number of
	1265	+ * pages is zeroed.
	1266	+ */
	1267	+ deflated_all = (ctl.n_pages == to_deflate_pages);
	1268	+
	1269	+ /* Update local and global counters */
	1270	+ n_unlocked_frames = ctl.n_pages * page_in_frames;
	1271	+ atomic64_sub(n_unlocked_frames, &b->size);
	1272	+ deflated_frames += n_unlocked_frames;
	1273	+
	1274	+ vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
	1275	+ ctl.page_size, ctl.n_pages);
	1276	+
	1277	+ /* free the ballooned pages */
	1278	+ vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
	1279	+ ctl.page_size);
	1280	+
	1281	+ /* Return the refused pages to the ballooned list. */
	1282	+ vmballoon_enqueue_page_list(b, &ctl.refused_pages,
	1283	+ &ctl.n_refused_pages,
	1284	+ ctl.page_size);
	1285	+
	1286	+ /* If we failed to unlock all the pages, move to next size. */
	1287	+ if (!deflated_all) {
	1288	+ if (ctl.page_size == b->max_page_size)
881	1289	break;
882		-
883		- list_del(&page->lru);
884		- b->ops->add_page(b, num_pages++, page);
885		-
886		- if (num_pages == b->batch_max_pages) {
887		- int error;
888		-
889		- error = b->ops->unlock(b, num_pages,
890		- is_2m_pages, &b->target);
891		- num_pages = 0;
892		- if (error)
893		- return;
894		- }
895		-
896		- cond_resched();
	1290	+ ctl.page_size++;
897	1291	}
898	1292
899		- if (num_pages > 0)
900		- b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
	1293	+ cond_resched();
901	1294	}
	1295	+
	1296	+ return deflated_frames;
902	1297	}
903	1298
904		-static const struct vmballoon_ops vmballoon_basic_ops = {
905		- .add_page = vmballoon_add_page,
906		- .lock = vmballoon_lock_page,
907		- .unlock = vmballoon_unlock_page
908		-};
	1299	+/**
	1300	+ * vmballoon_deinit_batching - disables batching mode.
	1301	+ *
	1302	+ * @b: pointer to &struct vmballoon.
	1303	+ *
	1304	+ * Disables batching, by deallocating the page for communication with the
	1305	+ * hypervisor and disabling the static key to indicate that batching is off.
	1306	+ */
	1307	+static void vmballoon_deinit_batching(struct vmballoon *b)
	1308	+{
	1309	+ free_page((unsigned long)b->batch_page);
	1310	+ b->batch_page = NULL;
	1311	+ static_branch_disable(&vmw_balloon_batching);
	1312	+ b->batch_max_pages = 1;
	1313	+}
909	1314
910		-static const struct vmballoon_ops vmballoon_batched_ops = {
911		- .add_page = vmballoon_add_batched_page,
912		- .lock = vmballoon_lock_batched_page,
913		- .unlock = vmballoon_unlock_batched_page
914		-};
915		-
916		-static bool vmballoon_init_batching(struct vmballoon *b)
	1315	+/**
	1316	+ * vmballoon_init_batching - enable batching mode.
	1317	+ *
	1318	+ * @b: pointer to &struct vmballoon.
	1319	+ *
	1320	+ * Enables batching, by allocating a page for communication with the hypervisor
	1321	+ * and enabling the static_key to use batching.
	1322	+ *
	1323	+ * Return: zero on success or an appropriate error-code.
	1324	+ */
	1325	+static int vmballoon_init_batching(struct vmballoon *b)
917	1326	{
918	1327	struct page *page;
919	1328
920	1329	page = alloc_page(GFP_KERNEL \| __GFP_ZERO);
921	1330	if (!page)
922		- return false;
	1331	+ return -ENOMEM;
923	1332
924	1333	b->batch_page = page_address(page);
925		- return true;
	1334	+ b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
	1335	+
	1336	+ static_branch_enable(&vmw_balloon_batching);
	1337	+
	1338	+ return 0;
926	1339	}
927	1340
928	1341	/*
..	..	@@ -932,7 +1345,7 @@
932	1345	{
933	1346	struct vmballoon *b = client_data;
934	1347
935		- STATS_INC(b->stats.doorbell);
	1348	+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
936	1349
937	1350	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
938	1351	}
..	..	@@ -942,11 +1355,8 @@
942	1355	*/
943	1356	static void vmballoon_vmci_cleanup(struct vmballoon *b)
944	1357	{
945		- int error;
946		-
947		- VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
948		- VMCI_INVALID_ID, error);
949		- STATS_INC(b->stats.doorbell_unset);
	1358	+ vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
	1359	+ VMCI_INVALID_ID, VMCI_INVALID_ID);
950	1360
951	1361	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
952	1362	vmci_doorbell_destroy(b->vmci_doorbell);
..	..	@@ -954,12 +1364,19 @@
954	1364	}
955	1365	}
956	1366
957		-/*
958		- * Initialize vmci doorbell, to get notified as soon as balloon changes
	1367	+/**
	1368	+ * vmballoon_vmci_init - Initialize vmci doorbell.
	1369	+ *
	1370	+ * @b: pointer to the balloon.
	1371	+ *
	1372	+ * Return: zero on success or when wakeup command not supported. Error-code
	1373	+ * otherwise.
	1374	+ *
	1375	+ * Initialize vmci doorbell, to get notified as soon as balloon changes.
959	1376	*/
960	1377	static int vmballoon_vmci_init(struct vmballoon *b)
961	1378	{
962		- unsigned long error, dummy;
	1379	+ unsigned long error;
963	1380
964	1381	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
965	1382	return 0;
..	..	@@ -971,10 +1388,9 @@
971	1388	if (error != VMCI_SUCCESS)
972	1389	goto fail;
973	1390
974		- error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context,
975		- b->vmci_doorbell.resource, dummy);
976		-
977		- STATS_INC(b->stats.doorbell_set);
	1391	+ error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
	1392	+ b->vmci_doorbell.context,
	1393	+ b->vmci_doorbell.resource, NULL);
978	1394
979	1395	if (error != VMW_BALLOON_SUCCESS)
980	1396	goto fail;
..	..	@@ -983,6 +1399,23 @@
983	1399	fail:
984	1400	vmballoon_vmci_cleanup(b);
985	1401	return -EIO;
	1402	+}
	1403	+
	1404	+/**
	1405	+ * vmballoon_pop - Quickly release all pages allocate for the balloon.
	1406	+ *
	1407	+ * @b: pointer to the balloon.
	1408	+ *
	1409	+ * This function is called when host decides to "reset" balloon for one reason
	1410	+ * or another. Unlike normal "deflate" we do not (shall not) notify host of the
	1411	+ * pages being released.
	1412	+ */
	1413	+static void vmballoon_pop(struct vmballoon *b)
	1414	+{
	1415	+ unsigned long size;
	1416	+
	1417	+ while ((size = atomic64_read(&b->size)))
	1418	+ vmballoon_deflate(b, size, false);
986	1419	}
987	1420
988	1421	/*
..	..	@@ -994,18 +1427,18 @@
994	1427	{
995	1428	int error;
996	1429
	1430	+ down_write(&b->conf_sem);
	1431	+
997	1432	vmballoon_vmci_cleanup(b);
998	1433
999	1434	/* free all pages, skipping monitor unlock */
1000	1435	vmballoon_pop(b);
1001	1436
1002		- if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1003		- return;
	1437	+ if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
	1438	+ goto unlock;
1004	1439
1005	1440	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1006		- b->ops = &vmballoon_batched_ops;
1007		- b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1008		- if (!vmballoon_init_batching(b)) {
	1441	+ if (vmballoon_init_batching(b)) {
1009	1442	/*
1010	1443	* We failed to initialize batching, inform the monitor
1011	1444	* about it by sending a null capability.
..	..	@@ -1013,48 +1446,66 @@
1013	1446	* The guest will retry in one second.
1014	1447	*/
1015	1448	vmballoon_send_start(b, 0);
1016		- return;
	1449	+ goto unlock;
1017	1450	}
1018	1451	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1019		- b->ops = &vmballoon_basic_ops;
1020		- b->batch_max_pages = 1;
	1452	+ vmballoon_deinit_batching(b);
1021	1453	}
1022	1454
	1455	+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
1023	1456	b->reset_required = false;
1024	1457
1025	1458	error = vmballoon_vmci_init(b);
1026	1459	if (error)
1027	1460	pr_err("failed to initialize vmci doorbell\n");
1028	1461
1029		- if (!vmballoon_send_guest_id(b))
	1462	+ if (vmballoon_send_guest_id(b))
1030	1463	pr_err("failed to send guest ID to the host\n");
	1464	+
	1465	+unlock:
	1466	+ up_write(&b->conf_sem);
1031	1467	}
1032	1468
1033		-/*
1034		- * Balloon work function: reset protocol, if needed, get the new size and
1035		- * adjust balloon as needed. Repeat in 1 sec.
	1469	+/**
	1470	+ * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
	1471	+ *
	1472	+ * @work: pointer to the &work_struct which is provided by the workqueue.
	1473	+ *
	1474	+ * Resets the protocol if needed, gets the new size and adjusts balloon as
	1475	+ * needed. Repeat in 1 sec.
1036	1476	*/
1037	1477	static void vmballoon_work(struct work_struct *work)
1038	1478	{
1039	1479	struct delayed_work *dwork = to_delayed_work(work);
1040	1480	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
1041		- unsigned int target;
1042		-
1043		- STATS_INC(b->stats.timer);
	1481	+ int64_t change = 0;
1044	1482
1045	1483	if (b->reset_required)
1046	1484	vmballoon_reset(b);
1047	1485
1048		- if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
1049		- /* update target, adjust size */
1050		- b->target = target;
	1486	+ down_read(&b->conf_sem);
1051	1487
1052		- if (b->size < target)
	1488	+ /*
	1489	+ * Update the stats while holding the semaphore to ensure that
	1490	+ * @stats_enabled is consistent with whether the stats are actually
	1491	+ * enabled
	1492	+ */
	1493	+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
	1494	+
	1495	+ if (!vmballoon_send_get_target(b))
	1496	+ change = vmballoon_change(b);
	1497	+
	1498	+ if (change != 0) {
	1499	+ pr_debug("%s - size: %llu, target %lu\n", __func__,
	1500	+ atomic64_read(&b->size), READ_ONCE(b->target));
	1501	+
	1502	+ if (change > 0)
1053	1503	vmballoon_inflate(b);
1054		- else if (target == 0 \|\|
1055		- b->size > target + vmballoon_page_size(true))
1056		- vmballoon_deflate(b);
	1504	+ else /* (change < 0) */
	1505	+ vmballoon_deflate(b, 0, true);
1057	1506	}
	1507	+
	1508	+ up_read(&b->conf_sem);
1058	1509
1059	1510	/*
1060	1511	* We are using a freezable workqueue so that balloon operations are
..	..	@@ -1062,6 +1513,91 @@
1062	1513	*/
1063	1514	queue_delayed_work(system_freezable_wq,
1064	1515	dwork, round_jiffies_relative(HZ));
	1516	+
	1517	+}
	1518	+
	1519	+/**
	1520	+ * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
	1521	+ * @shrinker: pointer to the balloon shrinker.
	1522	+ * @sc: page reclaim information.
	1523	+ *
	1524	+ * Returns: number of pages that were freed during deflation.
	1525	+ */
	1526	+static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
	1527	+ struct shrink_control *sc)
	1528	+{
	1529	+ struct vmballoon *b = &balloon;
	1530	+ unsigned long deflated_frames;
	1531	+
	1532	+ pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
	1533	+
	1534	+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
	1535	+
	1536	+ /*
	1537	+ * If the lock is also contended for read, we cannot easily reclaim and
	1538	+ * we bail out.
	1539	+ */
	1540	+ if (!down_read_trylock(&b->conf_sem))
	1541	+ return 0;
	1542	+
	1543	+ deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
	1544	+
	1545	+ vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
	1546	+ deflated_frames);
	1547	+
	1548	+ /*
	1549	+ * Delay future inflation for some time to mitigate the situations in
	1550	+ * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
	1551	+ * the access is asynchronous.
	1552	+ */
	1553	+ WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
	1554	+
	1555	+ up_read(&b->conf_sem);
	1556	+
	1557	+ return deflated_frames;
	1558	+}
	1559	+
	1560	+/**
	1561	+ * vmballoon_shrinker_count() - return the number of ballooned pages.
	1562	+ * @shrinker: pointer to the balloon shrinker.
	1563	+ * @sc: page reclaim information.
	1564	+ *
	1565	+ * Returns: number of 4k pages that are allocated for the balloon and can
	1566	+ * therefore be reclaimed under pressure.
	1567	+ */
	1568	+static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
	1569	+ struct shrink_control *sc)
	1570	+{
	1571	+ struct vmballoon *b = &balloon;
	1572	+
	1573	+ return atomic64_read(&b->size);
	1574	+}
	1575	+
	1576	+static void vmballoon_unregister_shrinker(struct vmballoon *b)
	1577	+{
	1578	+ if (b->shrinker_registered)
	1579	+ unregister_shrinker(&b->shrinker);
	1580	+ b->shrinker_registered = false;
	1581	+}
	1582	+
	1583	+static int vmballoon_register_shrinker(struct vmballoon *b)
	1584	+{
	1585	+ int r;
	1586	+
	1587	+ /* Do nothing if the shrinker is not enabled */
	1588	+ if (!vmwballoon_shrinker_enable)
	1589	+ return 0;
	1590	+
	1591	+ b->shrinker.scan_objects = vmballoon_shrinker_scan;
	1592	+ b->shrinker.count_objects = vmballoon_shrinker_count;
	1593	+ b->shrinker.seeks = DEFAULT_SEEKS;
	1594	+
	1595	+ r = register_shrinker(&b->shrinker);
	1596	+
	1597	+ if (r == 0)
	1598	+ b->shrinker_registered = true;
	1599	+
	1600	+ return r;
1065	1601	}
1066	1602
1067	1603	/*
..	..	@@ -1069,106 +1605,126 @@
1069	1605	*/
1070	1606	#ifdef CONFIG_DEBUG_FS
1071	1607
	1608	+static const char * const vmballoon_stat_page_names[] = {
	1609	+ [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc",
	1610	+ [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail",
	1611	+ [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc",
	1612	+ [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree",
	1613	+ [VMW_BALLOON_PAGE_STAT_FREE] = "free"
	1614	+};
	1615	+
	1616	+static const char * const vmballoon_stat_names[] = {
	1617	+ [VMW_BALLOON_STAT_TIMER] = "timer",
	1618	+ [VMW_BALLOON_STAT_DOORBELL] = "doorbell",
	1619	+ [VMW_BALLOON_STAT_RESET] = "reset",
	1620	+ [VMW_BALLOON_STAT_SHRINK] = "shrink",
	1621	+ [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
	1622	+};
	1623	+
	1624	+static int vmballoon_enable_stats(struct vmballoon *b)
	1625	+{
	1626	+ int r = 0;
	1627	+
	1628	+ down_write(&b->conf_sem);
	1629	+
	1630	+ /* did we somehow race with another reader which enabled stats? */
	1631	+ if (b->stats)
	1632	+ goto out;
	1633	+
	1634	+ b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
	1635	+
	1636	+ if (!b->stats) {
	1637	+ /* allocation failed */
	1638	+ r = -ENOMEM;
	1639	+ goto out;
	1640	+ }
	1641	+ static_key_enable(&balloon_stat_enabled.key);
	1642	+out:
	1643	+ up_write(&b->conf_sem);
	1644	+ return r;
	1645	+}
	1646	+
	1647	+/**
	1648	+ * vmballoon_debug_show - shows statistics of balloon operations.
	1649	+ * @f: pointer to the &struct seq_file.
	1650	+ * @offset: ignored.
	1651	+ *
	1652	+ * Provides the statistics that can be accessed in vmmemctl in the debugfs.
	1653	+ * To avoid the overhead - mainly that of memory - of collecting the statistics,
	1654	+ * we only collect statistics after the first time the counters are read.
	1655	+ *
	1656	+ * Return: zero on success or an error code.
	1657	+ */
1072	1658	static int vmballoon_debug_show(struct seq_file f, void offset)
1073	1659	{
1074	1660	struct vmballoon *b = f->private;
1075		- struct vmballoon_stats *stats = &b->stats;
	1661	+ int i, j;
	1662	+
	1663	+ /* enables stats if they are disabled */
	1664	+ if (!b->stats) {
	1665	+ int r = vmballoon_enable_stats(b);
	1666	+
	1667	+ if (r)
	1668	+ return r;
	1669	+ }
1076	1670
1077	1671	/* format capabilities info */
1078		- seq_printf(f,
1079		- "balloon capabilities: %#4x\n"
1080		- "used capabilities: %#4lx\n"
1081		- "is resetting: %c\n",
1082		- VMW_BALLOON_CAPABILITIES, b->capabilities,
1083		- b->reset_required ? 'y' : 'n');
	1672	+ seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
	1673	+ VMW_BALLOON_CAPABILITIES);
	1674	+ seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
	1675	+ seq_printf(f, "%-22s: %16s\n", "is resetting",
	1676	+ b->reset_required ? "y" : "n");
1084	1677
1085	1678	/* format size info */
1086		- seq_printf(f,
1087		- "target: %8d pages\n"
1088		- "current: %8d pages\n",
1089		- b->target, b->size);
	1679	+ seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
	1680	+ seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
1090	1681
1091		- seq_printf(f,
1092		- "\n"
1093		- "timer: %8u\n"
1094		- "doorbell: %8u\n"
1095		- "start: %8u (%4u failed)\n"
1096		- "guestType: %8u (%4u failed)\n"
1097		- "2m-lock: %8u (%4u failed)\n"
1098		- "lock: %8u (%4u failed)\n"
1099		- "2m-unlock: %8u (%4u failed)\n"
1100		- "unlock: %8u (%4u failed)\n"
1101		- "target: %8u (%4u failed)\n"
1102		- "prim2mAlloc: %8u (%4u failed)\n"
1103		- "primNoSleepAlloc: %8u (%4u failed)\n"
1104		- "primCanSleepAlloc: %8u (%4u failed)\n"
1105		- "prim2mFree: %8u\n"
1106		- "primFree: %8u\n"
1107		- "err2mAlloc: %8u\n"
1108		- "errAlloc: %8u\n"
1109		- "err2mFree: %8u\n"
1110		- "errFree: %8u\n"
1111		- "doorbellSet: %8u\n"
1112		- "doorbellUnset: %8u\n",
1113		- stats->timer,
1114		- stats->doorbell,
1115		- stats->start, stats->start_fail,
1116		- stats->guest_type, stats->guest_type_fail,
1117		- stats->lock[true], stats->lock_fail[true],
1118		- stats->lock[false], stats->lock_fail[false],
1119		- stats->unlock[true], stats->unlock_fail[true],
1120		- stats->unlock[false], stats->unlock_fail[false],
1121		- stats->target, stats->target_fail,
1122		- stats->alloc[true], stats->alloc_fail[true],
1123		- stats->alloc[false], stats->alloc_fail[false],
1124		- stats->sleep_alloc, stats->sleep_alloc_fail,
1125		- stats->free[true],
1126		- stats->free[false],
1127		- stats->refused_alloc[true], stats->refused_alloc[false],
1128		- stats->refused_free[true], stats->refused_free[false],
1129		- stats->doorbell_set, stats->doorbell_unset);
	1682	+ for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
	1683	+ if (vmballoon_cmd_names[i] == NULL)
	1684	+ continue;
1130	1685
1131		- return 0;
1132		-}
	1686	+ seq_printf(f, "%-22s: %16llu (%llu failed)\n",
	1687	+ vmballoon_cmd_names[i],
	1688	+ atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
	1689	+ atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
	1690	+ }
1133	1691
1134		-static int vmballoon_debug_open(struct inode inode, struct file file)
1135		-{
1136		- return single_open(file, vmballoon_debug_show, inode->i_private);
1137		-}
	1692	+ for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
	1693	+ seq_printf(f, "%-22s: %16llu\n",
	1694	+ vmballoon_stat_names[i],
	1695	+ atomic64_read(&b->stats->general_stat[i]));
1138	1696
1139		-static const struct file_operations vmballoon_debug_fops = {
1140		- .owner = THIS_MODULE,
1141		- .open = vmballoon_debug_open,
1142		- .read = seq_read,
1143		- .llseek = seq_lseek,
1144		- .release = single_release,
1145		-};
1146		-
1147		-static int __init vmballoon_debugfs_init(struct vmballoon *b)
1148		-{
1149		- int error;
1150		-
1151		- b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1152		- &vmballoon_debug_fops);
1153		- if (IS_ERR(b->dbg_entry)) {
1154		- error = PTR_ERR(b->dbg_entry);
1155		- pr_err("failed to create debugfs entry, error: %d\n", error);
1156		- return error;
	1697	+ for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
	1698	+ for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
	1699	+ seq_printf(f, "%-18s(%s): %16llu\n",
	1700	+ vmballoon_stat_page_names[i],
	1701	+ vmballoon_page_size_names[j],
	1702	+ atomic64_read(&b->stats->page_stat[i][j]));
1157	1703	}
1158	1704
1159	1705	return 0;
1160	1706	}
1161	1707
	1708	+DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
	1709	+
	1710	+static void __init vmballoon_debugfs_init(struct vmballoon *b)
	1711	+{
	1712	+ b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
	1713	+ &vmballoon_debug_fops);
	1714	+}
	1715	+
1162	1716	static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1163	1717	{
	1718	+ static_key_disable(&balloon_stat_enabled.key);
1164	1719	debugfs_remove(b->dbg_entry);
	1720	+ kfree(b->stats);
	1721	+ b->stats = NULL;
1165	1722	}
1166	1723
1167	1724	#else
1168	1725
1169		-static inline int vmballoon_debugfs_init(struct vmballoon *b)
	1726	+static inline void vmballoon_debugfs_init(struct vmballoon *b)
1170	1727	{
1171		- return 0;
1172	1728	}
1173	1729
1174	1730	static inline void vmballoon_debugfs_exit(struct vmballoon *b)
..	..	@@ -1177,10 +1733,199 @@
1177	1733
1178	1734	#endif /* CONFIG_DEBUG_FS */
1179	1735
	1736	+
	1737	+#ifdef CONFIG_BALLOON_COMPACTION
	1738	+
	1739	+static int vmballoon_init_fs_context(struct fs_context *fc)
	1740	+{
	1741	+ return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
	1742	+}
	1743	+
	1744	+static struct file_system_type vmballoon_fs = {
	1745	+ .name = "balloon-vmware",
	1746	+ .init_fs_context = vmballoon_init_fs_context,
	1747	+ .kill_sb = kill_anon_super,
	1748	+};
	1749	+
	1750	+static struct vfsmount *vmballoon_mnt;
	1751	+
	1752	+/**
	1753	+ * vmballoon_migratepage() - migrates a balloon page.
	1754	+ * @b_dev_info: balloon device information descriptor.
	1755	+ * @newpage: the page to which @page should be migrated.
	1756	+ * @page: a ballooned page that should be migrated.
	1757	+ * @mode: migration mode, ignored.
	1758	+ *
	1759	+ * This function is really open-coded, but that is according to the interface
	1760	+ * that balloon_compaction provides.
	1761	+ *
	1762	+ * Return: zero on success, -EAGAIN when migration cannot be performed
	1763	+ * momentarily, and -EBUSY if migration failed and should be retried
	1764	+ * with that specific page.
	1765	+ */
	1766	+static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
	1767	+ struct page newpage, struct page page,
	1768	+ enum migrate_mode mode)
	1769	+{
	1770	+ unsigned long status, flags;
	1771	+ struct vmballoon *b;
	1772	+ int ret;
	1773	+
	1774	+ b = container_of(b_dev_info, struct vmballoon, b_dev_info);
	1775	+
	1776	+ /*
	1777	+ * If the semaphore is taken, there is ongoing configuration change
	1778	+ * (i.e., balloon reset), so try again.
	1779	+ */
	1780	+ if (!down_read_trylock(&b->conf_sem))
	1781	+ return -EAGAIN;
	1782	+
	1783	+ spin_lock(&b->comm_lock);
	1784	+ /*
	1785	+ * We must start by deflating and not inflating, as otherwise the
	1786	+ * hypervisor may tell us that it has enough memory and the new page is
	1787	+ * not needed. Since the old page is isolated, we cannot use the list
	1788	+ * interface to unlock it, as the LRU field is used for isolation.
	1789	+ * Instead, we use the native interface directly.
	1790	+ */
	1791	+ vmballoon_add_page(b, 0, page);
	1792	+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
	1793	+ VMW_BALLOON_DEFLATE);
	1794	+
	1795	+ if (status == VMW_BALLOON_SUCCESS)
	1796	+ status = vmballoon_status_page(b, 0, &page);
	1797	+
	1798	+ /*
	1799	+ * If a failure happened, let the migration mechanism know that it
	1800	+ * should not retry.
	1801	+ */
	1802	+ if (status != VMW_BALLOON_SUCCESS) {
	1803	+ spin_unlock(&b->comm_lock);
	1804	+ ret = -EBUSY;
	1805	+ goto out_unlock;
	1806	+ }
	1807	+
	1808	+ /*
	1809	+ * The page is isolated, so it is safe to delete it without holding
	1810	+ * @pages_lock . We keep holding @comm_lock since we will need it in a
	1811	+ * second.
	1812	+ */
	1813	+ balloon_page_delete(page);
	1814	+
	1815	+ put_page(page);
	1816	+
	1817	+ /* Inflate */
	1818	+ vmballoon_add_page(b, 0, newpage);
	1819	+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
	1820	+ VMW_BALLOON_INFLATE);
	1821	+
	1822	+ if (status == VMW_BALLOON_SUCCESS)
	1823	+ status = vmballoon_status_page(b, 0, &newpage);
	1824	+
	1825	+ spin_unlock(&b->comm_lock);
	1826	+
	1827	+ if (status != VMW_BALLOON_SUCCESS) {
	1828	+ /*
	1829	+ * A failure happened. While we can deflate the page we just
	1830	+ * inflated, this deflation can also encounter an error. Instead
	1831	+ * we will decrease the size of the balloon to reflect the
	1832	+ * change and report failure.
	1833	+ */
	1834	+ atomic64_dec(&b->size);
	1835	+ ret = -EBUSY;
	1836	+ } else {
	1837	+ /*
	1838	+ * Success. Take a reference for the page, and we will add it to
	1839	+ * the list after acquiring the lock.
	1840	+ */
	1841	+ get_page(newpage);
	1842	+ ret = MIGRATEPAGE_SUCCESS;
	1843	+ }
	1844	+
	1845	+ /* Update the balloon list under the @pages_lock */
	1846	+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
	1847	+
	1848	+ /*
	1849	+ * On inflation success, we already took a reference for the @newpage.
	1850	+ * If we succeed just insert it to the list and update the statistics
	1851	+ * under the lock.
	1852	+ */
	1853	+ if (ret == MIGRATEPAGE_SUCCESS) {
	1854	+ balloon_page_insert(&b->b_dev_info, newpage);
	1855	+ __count_vm_event(BALLOON_MIGRATE);
	1856	+ }
	1857	+
	1858	+ /*
	1859	+ * We deflated successfully, so regardless to the inflation success, we
	1860	+ * need to reduce the number of isolated_pages.
	1861	+ */
	1862	+ b->b_dev_info.isolated_pages--;
	1863	+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
	1864	+
	1865	+out_unlock:
	1866	+ up_read(&b->conf_sem);
	1867	+ return ret;
	1868	+}
	1869	+
	1870	+/**
	1871	+ * vmballoon_compaction_deinit() - removes compaction related data.
	1872	+ *
	1873	+ * @b: pointer to the balloon.
	1874	+ */
	1875	+static void vmballoon_compaction_deinit(struct vmballoon *b)
	1876	+{
	1877	+ if (!IS_ERR(b->b_dev_info.inode))
	1878	+ iput(b->b_dev_info.inode);
	1879	+
	1880	+ b->b_dev_info.inode = NULL;
	1881	+ kern_unmount(vmballoon_mnt);
	1882	+ vmballoon_mnt = NULL;
	1883	+}
	1884	+
	1885	+/**
	1886	+ * vmballoon_compaction_init() - initialized compaction for the balloon.
	1887	+ *
	1888	+ * @b: pointer to the balloon.
	1889	+ *
	1890	+ * If during the initialization a failure occurred, this function does not
	1891	+ * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
	1892	+ * case.
	1893	+ *
	1894	+ * Return: zero on success or error code on failure.
	1895	+ */
	1896	+static __init int vmballoon_compaction_init(struct vmballoon *b)
	1897	+{
	1898	+ vmballoon_mnt = kern_mount(&vmballoon_fs);
	1899	+ if (IS_ERR(vmballoon_mnt))
	1900	+ return PTR_ERR(vmballoon_mnt);
	1901	+
	1902	+ b->b_dev_info.migratepage = vmballoon_migratepage;
	1903	+ b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
	1904	+
	1905	+ if (IS_ERR(b->b_dev_info.inode))
	1906	+ return PTR_ERR(b->b_dev_info.inode);
	1907	+
	1908	+ b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
	1909	+ return 0;
	1910	+}
	1911	+
	1912	+#else /* CONFIG_BALLOON_COMPACTION */
	1913	+
	1914	+static void vmballoon_compaction_deinit(struct vmballoon *b)
	1915	+{
	1916	+}
	1917	+
	1918	+static int vmballoon_compaction_init(struct vmballoon *b)
	1919	+{
	1920	+ return 0;
	1921	+}
	1922	+
	1923	+#endif /* CONFIG_BALLOON_COMPACTION */
	1924	+
1180	1925	static int __init vmballoon_init(void)
1181	1926	{
1182	1927	int error;
1183		- unsigned is_2m_pages;
	1928	+
1184	1929	/*
1185	1930	* Check if we are running on VMware's hypervisor and bail out
1186	1931	* if we are not.
..	..	@@ -1188,18 +1933,24 @@
1188	1933	if (x86_hyper_type != X86_HYPER_VMWARE)
1189	1934	return -ENODEV;
1190	1935
1191		- for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1192		- is_2m_pages++) {
1193		- INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1194		- INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1195		- }
1196		-
1197	1936	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1198	1937
1199		- error = vmballoon_debugfs_init(&balloon);
	1938	+ error = vmballoon_register_shrinker(&balloon);
1200	1939	if (error)
1201		- return error;
	1940	+ goto fail;
1202	1941
	1942	+ /*
	1943	+ * Initialization of compaction must be done after the call to
	1944	+ * balloon_devinfo_init() .
	1945	+ */
	1946	+ balloon_devinfo_init(&balloon.b_dev_info);
	1947	+ error = vmballoon_compaction_init(&balloon);
	1948	+ if (error)
	1949	+ goto fail;
	1950	+
	1951	+ INIT_LIST_HEAD(&balloon.huge_pages);
	1952	+ spin_lock_init(&balloon.comm_lock);
	1953	+ init_rwsem(&balloon.conf_sem);
1203	1954	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1204	1955	balloon.batch_page = NULL;
1205	1956	balloon.page = NULL;
..	..	@@ -1207,7 +1958,13 @@
1207	1958
1208	1959	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
1209	1960
	1961	+ vmballoon_debugfs_init(&balloon);
	1962	+
1210	1963	return 0;
	1964	+fail:
	1965	+ vmballoon_unregister_shrinker(&balloon);
	1966	+ vmballoon_compaction_deinit(&balloon);
	1967	+ return error;
1211	1968	}
1212	1969
1213	1970	/*
..	..	@@ -1220,6 +1977,7 @@
1220	1977
1221	1978	static void __exit vmballoon_exit(void)
1222	1979	{
	1980	+ vmballoon_unregister_shrinker(&balloon);
1223	1981	vmballoon_vmci_cleanup(&balloon);
1224	1982	cancel_delayed_work_sync(&balloon.dwork);
1225	1983
..	..	@@ -1232,5 +1990,8 @@
1232	1990	*/
1233	1991	vmballoon_send_start(&balloon, 0);
1234	1992	vmballoon_pop(&balloon);
	1993	+
	1994	+ /* Only once we popped the balloon, compaction can be deinit */
	1995	+ vmballoon_compaction_deinit(&balloon);
1235	1996	}
1236	1997	module_exit(vmballoon_exit);