.. | .. |
---|
17 | 17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
18 | 18 | |
---|
19 | 19 | #include <linux/types.h> |
---|
| 20 | +#include <linux/io.h> |
---|
20 | 21 | #include <linux/kernel.h> |
---|
21 | 22 | #include <linux/mm.h> |
---|
22 | 23 | #include <linux/vmalloc.h> |
---|
.. | .. |
---|
25 | 26 | #include <linux/workqueue.h> |
---|
26 | 27 | #include <linux/debugfs.h> |
---|
27 | 28 | #include <linux/seq_file.h> |
---|
| 29 | +#include <linux/rwsem.h> |
---|
| 30 | +#include <linux/slab.h> |
---|
| 31 | +#include <linux/spinlock.h> |
---|
| 32 | +#include <linux/mount.h> |
---|
| 33 | +#include <linux/pseudo_fs.h> |
---|
| 34 | +#include <linux/balloon_compaction.h> |
---|
28 | 35 | #include <linux/vmw_vmci_defs.h> |
---|
29 | 36 | #include <linux/vmw_vmci_api.h> |
---|
30 | 37 | #include <asm/hypervisor.h> |
---|
31 | 38 | |
---|
32 | 39 | MODULE_AUTHOR("VMware, Inc."); |
---|
33 | 40 | MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); |
---|
34 | | -MODULE_VERSION("1.5.0.0-k"); |
---|
35 | 41 | MODULE_ALIAS("dmi:*:svnVMware*:*"); |
---|
36 | 42 | MODULE_ALIAS("vmware_vmmemctl"); |
---|
37 | 43 | MODULE_LICENSE("GPL"); |
---|
38 | 44 | |
---|
39 | | -/* |
---|
40 | | - * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't |
---|
41 | | - * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use |
---|
42 | | - * __GFP_NOWARN, to suppress page allocation failure warnings. |
---|
43 | | - */ |
---|
44 | | -#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) |
---|
| 45 | +static bool __read_mostly vmwballoon_shrinker_enable; |
---|
| 46 | +module_param(vmwballoon_shrinker_enable, bool, 0444); |
---|
| 47 | +MODULE_PARM_DESC(vmwballoon_shrinker_enable, |
---|
| 48 | + "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance."); |
---|
45 | 49 | |
---|
46 | | -/* |
---|
47 | | - * Use GFP_HIGHUSER when executing in a separate kernel thread |
---|
48 | | - * context and allocation can sleep. This is less stressful to |
---|
49 | | - * the guest memory system, since it allows the thread to block |
---|
50 | | - * while memory is reclaimed, and won't take pages from emergency |
---|
51 | | - * low-memory pools. |
---|
52 | | - */ |
---|
53 | | -#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) |
---|
| 50 | +/* Delay in seconds after shrink before inflation. */ |
---|
| 51 | +#define VMBALLOON_SHRINK_DELAY (5) |
---|
54 | 52 | |
---|
55 | 53 | /* Maximum number of refused pages we accumulate during inflation cycle */ |
---|
56 | 54 | #define VMW_BALLOON_MAX_REFUSED 16 |
---|
| 55 | + |
---|
| 56 | +/* Magic number for the balloon mount-point */ |
---|
| 57 | +#define BALLOON_VMW_MAGIC 0x0ba11007 |
---|
57 | 58 | |
---|
58 | 59 | /* |
---|
59 | 60 | * Hypervisor communication port definitions. |
---|
.. | .. |
---|
70 | 71 | VMW_BALLOON_BATCHED_CMDS = (1 << 2), |
---|
71 | 72 | VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), |
---|
72 | 73 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), |
---|
| 74 | + VMW_BALLOON_64_BIT_TARGET = (1 << 5) |
---|
73 | 75 | }; |
---|
74 | 76 | |
---|
75 | | -#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ |
---|
| 77 | +#define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \ |
---|
76 | 78 | | VMW_BALLOON_BATCHED_CMDS \ |
---|
77 | 79 | | VMW_BALLOON_BATCHED_2M_CMDS \ |
---|
78 | 80 | | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) |
---|
79 | 81 | |
---|
80 | | -#define VMW_BALLOON_2M_SHIFT (9) |
---|
81 | | -#define VMW_BALLOON_NUM_PAGE_SIZES (2) |
---|
| 82 | +#define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) |
---|
82 | 83 | |
---|
83 | 84 | /* |
---|
84 | | - * Backdoor commands availability: |
---|
85 | | - * |
---|
86 | | - * START, GET_TARGET and GUEST_ID are always available, |
---|
87 | | - * |
---|
88 | | - * VMW_BALLOON_BASIC_CMDS: |
---|
89 | | - * LOCK and UNLOCK commands, |
---|
90 | | - * VMW_BALLOON_BATCHED_CMDS: |
---|
91 | | - * BATCHED_LOCK and BATCHED_UNLOCK commands. |
---|
92 | | - * VMW BALLOON_BATCHED_2M_CMDS: |
---|
93 | | - * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, |
---|
94 | | - * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: |
---|
95 | | - * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. |
---|
| 85 | + * 64-bit targets are only supported in 64-bit |
---|
96 | 86 | */ |
---|
97 | | -#define VMW_BALLOON_CMD_START 0 |
---|
98 | | -#define VMW_BALLOON_CMD_GET_TARGET 1 |
---|
99 | | -#define VMW_BALLOON_CMD_LOCK 2 |
---|
100 | | -#define VMW_BALLOON_CMD_UNLOCK 3 |
---|
101 | | -#define VMW_BALLOON_CMD_GUEST_ID 4 |
---|
102 | | -#define VMW_BALLOON_CMD_BATCHED_LOCK 6 |
---|
103 | | -#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 |
---|
104 | | -#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 |
---|
105 | | -#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 |
---|
106 | | -#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 |
---|
| 87 | +#ifdef CONFIG_64BIT |
---|
| 88 | +#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \ |
---|
| 89 | + | VMW_BALLOON_64_BIT_TARGET) |
---|
| 90 | +#else |
---|
| 91 | +#define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON |
---|
| 92 | +#endif |
---|
107 | 93 | |
---|
| 94 | +enum vmballoon_page_size_type { |
---|
| 95 | + VMW_BALLOON_4K_PAGE, |
---|
| 96 | + VMW_BALLOON_2M_PAGE, |
---|
| 97 | + VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE |
---|
| 98 | +}; |
---|
108 | 99 | |
---|
109 | | -/* error codes */ |
---|
110 | | -#define VMW_BALLOON_SUCCESS 0 |
---|
111 | | -#define VMW_BALLOON_FAILURE -1 |
---|
112 | | -#define VMW_BALLOON_ERROR_CMD_INVALID 1 |
---|
113 | | -#define VMW_BALLOON_ERROR_PPN_INVALID 2 |
---|
114 | | -#define VMW_BALLOON_ERROR_PPN_LOCKED 3 |
---|
115 | | -#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 |
---|
116 | | -#define VMW_BALLOON_ERROR_PPN_PINNED 5 |
---|
117 | | -#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 |
---|
118 | | -#define VMW_BALLOON_ERROR_RESET 7 |
---|
119 | | -#define VMW_BALLOON_ERROR_BUSY 8 |
---|
| 100 | +#define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) |
---|
| 101 | + |
---|
| 102 | +static const char * const vmballoon_page_size_names[] = { |
---|
| 103 | + [VMW_BALLOON_4K_PAGE] = "4k", |
---|
| 104 | + [VMW_BALLOON_2M_PAGE] = "2M" |
---|
| 105 | +}; |
---|
| 106 | + |
---|
| 107 | +enum vmballoon_op { |
---|
| 108 | + VMW_BALLOON_INFLATE, |
---|
| 109 | + VMW_BALLOON_DEFLATE |
---|
| 110 | +}; |
---|
| 111 | + |
---|
| 112 | +enum vmballoon_op_stat_type { |
---|
| 113 | + VMW_BALLOON_OP_STAT, |
---|
| 114 | + VMW_BALLOON_OP_FAIL_STAT |
---|
| 115 | +}; |
---|
| 116 | + |
---|
| 117 | +#define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) |
---|
| 118 | + |
---|
| 119 | +/** |
---|
| 120 | + * enum vmballoon_cmd_type - backdoor commands. |
---|
| 121 | + * |
---|
| 122 | + * Availability of the commands is as followed: |
---|
| 123 | + * |
---|
| 124 | + * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and |
---|
| 125 | + * %VMW_BALLOON_CMD_GUEST_ID are always available. |
---|
| 126 | + * |
---|
| 127 | + * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then |
---|
| 128 | + * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. |
---|
| 129 | + * |
---|
| 130 | + * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then |
---|
| 131 | + * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands |
---|
| 132 | + * are available. |
---|
| 133 | + * |
---|
| 134 | + * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then |
---|
| 135 | + * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK |
---|
| 136 | + * are supported. |
---|
| 137 | + * |
---|
| 138 | + * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then |
---|
| 139 | + * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. |
---|
| 140 | + * |
---|
| 141 | + * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. |
---|
| 142 | + * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. |
---|
| 143 | + * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. |
---|
| 144 | + * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about |
---|
| 145 | + * to be deflated from the balloon. |
---|
| 146 | + * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that |
---|
| 147 | + * runs in the VM. |
---|
| 148 | + * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of |
---|
| 149 | + * ballooned pages (up to 512). |
---|
| 150 | + * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of |
---|
| 151 | + * pages that are about to be deflated from the |
---|
| 152 | + * balloon (up to 512). |
---|
| 153 | + * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK |
---|
| 154 | + * for 2MB pages. |
---|
| 155 | + * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to |
---|
| 156 | + * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB |
---|
| 157 | + * pages. |
---|
| 158 | + * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification |
---|
| 159 | + * that would be invoked when the balloon |
---|
| 160 | + * size changes. |
---|
| 161 | + * @VMW_BALLOON_CMD_LAST: Value of the last command. |
---|
| 162 | + */ |
---|
| 163 | +enum vmballoon_cmd_type { |
---|
| 164 | + VMW_BALLOON_CMD_START, |
---|
| 165 | + VMW_BALLOON_CMD_GET_TARGET, |
---|
| 166 | + VMW_BALLOON_CMD_LOCK, |
---|
| 167 | + VMW_BALLOON_CMD_UNLOCK, |
---|
| 168 | + VMW_BALLOON_CMD_GUEST_ID, |
---|
| 169 | + /* No command 5 */ |
---|
| 170 | + VMW_BALLOON_CMD_BATCHED_LOCK = 6, |
---|
| 171 | + VMW_BALLOON_CMD_BATCHED_UNLOCK, |
---|
| 172 | + VMW_BALLOON_CMD_BATCHED_2M_LOCK, |
---|
| 173 | + VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, |
---|
| 174 | + VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
---|
| 175 | + VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
---|
| 176 | +}; |
---|
| 177 | + |
---|
| 178 | +#define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) |
---|
| 179 | + |
---|
| 180 | +enum vmballoon_error_codes { |
---|
| 181 | + VMW_BALLOON_SUCCESS, |
---|
| 182 | + VMW_BALLOON_ERROR_CMD_INVALID, |
---|
| 183 | + VMW_BALLOON_ERROR_PPN_INVALID, |
---|
| 184 | + VMW_BALLOON_ERROR_PPN_LOCKED, |
---|
| 185 | + VMW_BALLOON_ERROR_PPN_UNLOCKED, |
---|
| 186 | + VMW_BALLOON_ERROR_PPN_PINNED, |
---|
| 187 | + VMW_BALLOON_ERROR_PPN_NOTNEEDED, |
---|
| 188 | + VMW_BALLOON_ERROR_RESET, |
---|
| 189 | + VMW_BALLOON_ERROR_BUSY |
---|
| 190 | +}; |
---|
120 | 191 | |
---|
121 | 192 | #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) |
---|
122 | 193 | |
---|
123 | | -/* Batch page description */ |
---|
| 194 | +#define VMW_BALLOON_CMD_WITH_TARGET_MASK \ |
---|
| 195 | + ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ |
---|
| 196 | + (1UL << VMW_BALLOON_CMD_LOCK) | \ |
---|
| 197 | + (1UL << VMW_BALLOON_CMD_UNLOCK) | \ |
---|
| 198 | + (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ |
---|
| 199 | + (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ |
---|
| 200 | + (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ |
---|
| 201 | + (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) |
---|
124 | 202 | |
---|
125 | | -/* |
---|
126 | | - * Layout of a page in the batch page: |
---|
127 | | - * |
---|
128 | | - * +-------------+----------+--------+ |
---|
129 | | - * | | | | |
---|
130 | | - * | Page number | Reserved | Status | |
---|
131 | | - * | | | | |
---|
132 | | - * +-------------+----------+--------+ |
---|
133 | | - * 64 PAGE_SHIFT 6 0 |
---|
134 | | - * |
---|
135 | | - * The reserved field should be set to 0. |
---|
136 | | - */ |
---|
137 | | -#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) |
---|
138 | | -#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) |
---|
139 | | -#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) |
---|
140 | | - |
---|
141 | | -struct vmballoon_batch_page { |
---|
142 | | - u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; |
---|
| 203 | +static const char * const vmballoon_cmd_names[] = { |
---|
| 204 | + [VMW_BALLOON_CMD_START] = "start", |
---|
| 205 | + [VMW_BALLOON_CMD_GET_TARGET] = "target", |
---|
| 206 | + [VMW_BALLOON_CMD_LOCK] = "lock", |
---|
| 207 | + [VMW_BALLOON_CMD_UNLOCK] = "unlock", |
---|
| 208 | + [VMW_BALLOON_CMD_GUEST_ID] = "guestType", |
---|
| 209 | + [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", |
---|
| 210 | + [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", |
---|
| 211 | + [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", |
---|
| 212 | + [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", |
---|
| 213 | + [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" |
---|
143 | 214 | }; |
---|
144 | 215 | |
---|
145 | | -static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) |
---|
146 | | -{ |
---|
147 | | - return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; |
---|
148 | | -} |
---|
149 | | - |
---|
150 | | -static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, |
---|
151 | | - int idx) |
---|
152 | | -{ |
---|
153 | | - return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); |
---|
154 | | -} |
---|
155 | | - |
---|
156 | | -static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, |
---|
157 | | - u64 pa) |
---|
158 | | -{ |
---|
159 | | - batch->pages[idx] = pa; |
---|
160 | | -} |
---|
161 | | - |
---|
162 | | - |
---|
163 | | -#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ |
---|
164 | | -({ \ |
---|
165 | | - unsigned long __status, __dummy1, __dummy2, __dummy3; \ |
---|
166 | | - __asm__ __volatile__ ("inl %%dx" : \ |
---|
167 | | - "=a"(__status), \ |
---|
168 | | - "=c"(__dummy1), \ |
---|
169 | | - "=d"(__dummy2), \ |
---|
170 | | - "=b"(result), \ |
---|
171 | | - "=S" (__dummy3) : \ |
---|
172 | | - "0"(VMW_BALLOON_HV_MAGIC), \ |
---|
173 | | - "1"(VMW_BALLOON_CMD_##cmd), \ |
---|
174 | | - "2"(VMW_BALLOON_HV_PORT), \ |
---|
175 | | - "3"(arg1), \ |
---|
176 | | - "4" (arg2) : \ |
---|
177 | | - "memory"); \ |
---|
178 | | - if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ |
---|
179 | | - result = __dummy1; \ |
---|
180 | | - result &= -1UL; \ |
---|
181 | | - __status & -1UL; \ |
---|
182 | | -}) |
---|
183 | | - |
---|
184 | | -#ifdef CONFIG_DEBUG_FS |
---|
185 | | -struct vmballoon_stats { |
---|
186 | | - unsigned int timer; |
---|
187 | | - unsigned int doorbell; |
---|
188 | | - |
---|
189 | | - /* allocation statistics */ |
---|
190 | | - unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
191 | | - unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
192 | | - unsigned int sleep_alloc; |
---|
193 | | - unsigned int sleep_alloc_fail; |
---|
194 | | - unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
195 | | - unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
196 | | - unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
197 | | - |
---|
198 | | - /* monitor operations */ |
---|
199 | | - unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
200 | | - unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
201 | | - unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
202 | | - unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
203 | | - unsigned int target; |
---|
204 | | - unsigned int target_fail; |
---|
205 | | - unsigned int start; |
---|
206 | | - unsigned int start_fail; |
---|
207 | | - unsigned int guest_type; |
---|
208 | | - unsigned int guest_type_fail; |
---|
209 | | - unsigned int doorbell_set; |
---|
210 | | - unsigned int doorbell_unset; |
---|
| 216 | +enum vmballoon_stat_page { |
---|
| 217 | + VMW_BALLOON_PAGE_STAT_ALLOC, |
---|
| 218 | + VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, |
---|
| 219 | + VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, |
---|
| 220 | + VMW_BALLOON_PAGE_STAT_REFUSED_FREE, |
---|
| 221 | + VMW_BALLOON_PAGE_STAT_FREE, |
---|
| 222 | + VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE |
---|
211 | 223 | }; |
---|
212 | 224 | |
---|
213 | | -#define STATS_INC(stat) (stat)++ |
---|
214 | | -#else |
---|
215 | | -#define STATS_INC(stat) |
---|
216 | | -#endif |
---|
| 225 | +#define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) |
---|
217 | 226 | |
---|
218 | | -struct vmballoon; |
---|
219 | | - |
---|
220 | | -struct vmballoon_ops { |
---|
221 | | - void (*add_page)(struct vmballoon *b, int idx, struct page *p); |
---|
222 | | - int (*lock)(struct vmballoon *b, unsigned int num_pages, |
---|
223 | | - bool is_2m_pages, unsigned int *target); |
---|
224 | | - int (*unlock)(struct vmballoon *b, unsigned int num_pages, |
---|
225 | | - bool is_2m_pages, unsigned int *target); |
---|
| 227 | +enum vmballoon_stat_general { |
---|
| 228 | + VMW_BALLOON_STAT_TIMER, |
---|
| 229 | + VMW_BALLOON_STAT_DOORBELL, |
---|
| 230 | + VMW_BALLOON_STAT_RESET, |
---|
| 231 | + VMW_BALLOON_STAT_SHRINK, |
---|
| 232 | + VMW_BALLOON_STAT_SHRINK_FREE, |
---|
| 233 | + VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE |
---|
226 | 234 | }; |
---|
227 | 235 | |
---|
228 | | -struct vmballoon_page_size { |
---|
229 | | - /* list of reserved physical pages */ |
---|
| 236 | +#define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) |
---|
| 237 | + |
---|
| 238 | +static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); |
---|
| 239 | +static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); |
---|
| 240 | + |
---|
| 241 | +struct vmballoon_ctl { |
---|
230 | 242 | struct list_head pages; |
---|
231 | | - |
---|
232 | | - /* transient list of non-balloonable pages */ |
---|
233 | 243 | struct list_head refused_pages; |
---|
| 244 | + struct list_head prealloc_pages; |
---|
234 | 245 | unsigned int n_refused_pages; |
---|
| 246 | + unsigned int n_pages; |
---|
| 247 | + enum vmballoon_page_size_type page_size; |
---|
| 248 | + enum vmballoon_op op; |
---|
235 | 249 | }; |
---|
| 250 | + |
---|
| 251 | +/** |
---|
| 252 | + * struct vmballoon_batch_entry - a batch entry for lock or unlock. |
---|
| 253 | + * |
---|
| 254 | + * @status: the status of the operation, which is written by the hypervisor. |
---|
| 255 | + * @reserved: reserved for future use. Must be set to zero. |
---|
| 256 | + * @pfn: the physical frame number of the page to be locked or unlocked. |
---|
| 257 | + */ |
---|
| 258 | +struct vmballoon_batch_entry { |
---|
| 259 | + u64 status : 5; |
---|
| 260 | + u64 reserved : PAGE_SHIFT - 5; |
---|
| 261 | + u64 pfn : 52; |
---|
| 262 | +} __packed; |
---|
236 | 263 | |
---|
237 | 264 | struct vmballoon { |
---|
238 | | - struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
| 265 | + /** |
---|
| 266 | + * @max_page_size: maximum supported page size for ballooning. |
---|
| 267 | + * |
---|
| 268 | + * Protected by @conf_sem |
---|
| 269 | + */ |
---|
| 270 | + enum vmballoon_page_size_type max_page_size; |
---|
239 | 271 | |
---|
240 | | - /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ |
---|
241 | | - unsigned supported_page_sizes; |
---|
| 272 | + /** |
---|
| 273 | + * @size: balloon actual size in basic page size (frames). |
---|
| 274 | + * |
---|
| 275 | + * While we currently do not support size which is bigger than 32-bit, |
---|
| 276 | + * in preparation for future support, use 64-bits. |
---|
| 277 | + */ |
---|
| 278 | + atomic64_t size; |
---|
242 | 279 | |
---|
243 | | - /* balloon size in pages */ |
---|
244 | | - unsigned int size; |
---|
245 | | - unsigned int target; |
---|
| 280 | + /** |
---|
| 281 | + * @target: balloon target size in basic page size (frames). |
---|
| 282 | + * |
---|
| 283 | + * We do not protect the target under the assumption that setting the |
---|
| 284 | + * value is always done through a single write. If this assumption ever |
---|
| 285 | + * breaks, we would have to use X_ONCE for accesses, and suffer the less |
---|
| 286 | + * optimized code. Although we may read stale target value if multiple |
---|
| 287 | + * accesses happen at once, the performance impact should be minor. |
---|
| 288 | + */ |
---|
| 289 | + unsigned long target; |
---|
246 | 290 | |
---|
247 | | - /* reset flag */ |
---|
| 291 | + /** |
---|
| 292 | + * @reset_required: reset flag |
---|
| 293 | + * |
---|
| 294 | + * Setting this flag may introduce races, but the code is expected to |
---|
| 295 | + * handle them gracefully. In the worst case, another operation will |
---|
| 296 | + * fail as reset did not take place. Clearing the flag is done while |
---|
| 297 | + * holding @conf_sem for write. |
---|
| 298 | + */ |
---|
248 | 299 | bool reset_required; |
---|
249 | 300 | |
---|
| 301 | + /** |
---|
| 302 | + * @capabilities: hypervisor balloon capabilities. |
---|
| 303 | + * |
---|
| 304 | + * Protected by @conf_sem. |
---|
| 305 | + */ |
---|
250 | 306 | unsigned long capabilities; |
---|
251 | 307 | |
---|
252 | | - struct vmballoon_batch_page *batch_page; |
---|
| 308 | + /** |
---|
| 309 | + * @batch_page: pointer to communication batch page. |
---|
| 310 | + * |
---|
| 311 | + * When batching is used, batch_page points to a page, which holds up to |
---|
| 312 | + * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. |
---|
| 313 | + */ |
---|
| 314 | + struct vmballoon_batch_entry *batch_page; |
---|
| 315 | + |
---|
| 316 | + /** |
---|
| 317 | + * @batch_max_pages: maximum pages that can be locked/unlocked. |
---|
| 318 | + * |
---|
| 319 | + * Indicates the number of pages that the hypervisor can lock or unlock |
---|
| 320 | + * at once, according to whether batching is enabled. If batching is |
---|
| 321 | + * disabled, only a single page can be locked/unlock on each operation. |
---|
| 322 | + * |
---|
| 323 | + * Protected by @conf_sem. |
---|
| 324 | + */ |
---|
253 | 325 | unsigned int batch_max_pages; |
---|
| 326 | + |
---|
| 327 | + /** |
---|
| 328 | + * @page: page to be locked/unlocked by the hypervisor |
---|
| 329 | + * |
---|
| 330 | + * @page is only used when batching is disabled and a single page is |
---|
| 331 | + * reclaimed on each iteration. |
---|
| 332 | + * |
---|
| 333 | + * Protected by @comm_lock. |
---|
| 334 | + */ |
---|
254 | 335 | struct page *page; |
---|
255 | 336 | |
---|
256 | | - const struct vmballoon_ops *ops; |
---|
| 337 | + /** |
---|
| 338 | + * @shrink_timeout: timeout until the next inflation. |
---|
| 339 | + * |
---|
| 340 | + * After an shrink event, indicates the time in jiffies after which |
---|
| 341 | + * inflation is allowed again. Can be written concurrently with reads, |
---|
| 342 | + * so must use READ_ONCE/WRITE_ONCE when accessing. |
---|
| 343 | + */ |
---|
| 344 | + unsigned long shrink_timeout; |
---|
| 345 | + |
---|
| 346 | + /* statistics */ |
---|
| 347 | + struct vmballoon_stats *stats; |
---|
257 | 348 | |
---|
258 | 349 | #ifdef CONFIG_DEBUG_FS |
---|
259 | | - /* statistics */ |
---|
260 | | - struct vmballoon_stats stats; |
---|
261 | | - |
---|
262 | 350 | /* debugfs file exporting statistics */ |
---|
263 | 351 | struct dentry *dbg_entry; |
---|
264 | 352 | #endif |
---|
265 | 353 | |
---|
266 | | - struct sysinfo sysinfo; |
---|
| 354 | + /** |
---|
| 355 | + * @b_dev_info: balloon device information descriptor. |
---|
| 356 | + */ |
---|
| 357 | + struct balloon_dev_info b_dev_info; |
---|
267 | 358 | |
---|
268 | 359 | struct delayed_work dwork; |
---|
269 | 360 | |
---|
| 361 | + /** |
---|
| 362 | + * @huge_pages - list of the inflated 2MB pages. |
---|
| 363 | + * |
---|
| 364 | + * Protected by @b_dev_info.pages_lock . |
---|
| 365 | + */ |
---|
| 366 | + struct list_head huge_pages; |
---|
| 367 | + |
---|
| 368 | + /** |
---|
| 369 | + * @vmci_doorbell. |
---|
| 370 | + * |
---|
| 371 | + * Protected by @conf_sem. |
---|
| 372 | + */ |
---|
270 | 373 | struct vmci_handle vmci_doorbell; |
---|
| 374 | + |
---|
| 375 | + /** |
---|
| 376 | + * @conf_sem: semaphore to protect the configuration and the statistics. |
---|
| 377 | + */ |
---|
| 378 | + struct rw_semaphore conf_sem; |
---|
| 379 | + |
---|
| 380 | + /** |
---|
| 381 | + * @comm_lock: lock to protect the communication with the host. |
---|
| 382 | + * |
---|
| 383 | + * Lock ordering: @conf_sem -> @comm_lock . |
---|
| 384 | + */ |
---|
| 385 | + spinlock_t comm_lock; |
---|
| 386 | + |
---|
| 387 | + /** |
---|
| 388 | + * @shrinker: shrinker interface that is used to avoid over-inflation. |
---|
| 389 | + */ |
---|
| 390 | + struct shrinker shrinker; |
---|
| 391 | + |
---|
| 392 | + /** |
---|
| 393 | + * @shrinker_registered: whether the shrinker was registered. |
---|
| 394 | + * |
---|
| 395 | + * The shrinker interface does not handle gracefully the removal of |
---|
| 396 | + * shrinker that was not registered before. This indication allows to |
---|
| 397 | + * simplify the unregistration process. |
---|
| 398 | + */ |
---|
| 399 | + bool shrinker_registered; |
---|
271 | 400 | }; |
---|
272 | 401 | |
---|
273 | 402 | static struct vmballoon balloon; |
---|
| 403 | + |
---|
| 404 | +struct vmballoon_stats { |
---|
| 405 | + /* timer / doorbell operations */ |
---|
| 406 | + atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; |
---|
| 407 | + |
---|
| 408 | + /* allocation statistics for huge and small pages */ |
---|
| 409 | + atomic64_t |
---|
| 410 | + page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; |
---|
| 411 | + |
---|
| 412 | + /* Monitor operations: total operations, and failures */ |
---|
| 413 | + atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; |
---|
| 414 | +}; |
---|
| 415 | + |
---|
| 416 | +static inline bool is_vmballoon_stats_on(void) |
---|
| 417 | +{ |
---|
| 418 | + return IS_ENABLED(CONFIG_DEBUG_FS) && |
---|
| 419 | + static_branch_unlikely(&balloon_stat_enabled); |
---|
| 420 | +} |
---|
| 421 | + |
---|
| 422 | +static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, |
---|
| 423 | + enum vmballoon_op_stat_type type) |
---|
| 424 | +{ |
---|
| 425 | + if (is_vmballoon_stats_on()) |
---|
| 426 | + atomic64_inc(&b->stats->ops[op][type]); |
---|
| 427 | +} |
---|
| 428 | + |
---|
| 429 | +static inline void vmballoon_stats_gen_inc(struct vmballoon *b, |
---|
| 430 | + enum vmballoon_stat_general stat) |
---|
| 431 | +{ |
---|
| 432 | + if (is_vmballoon_stats_on()) |
---|
| 433 | + atomic64_inc(&b->stats->general_stat[stat]); |
---|
| 434 | +} |
---|
| 435 | + |
---|
| 436 | +static inline void vmballoon_stats_gen_add(struct vmballoon *b, |
---|
| 437 | + enum vmballoon_stat_general stat, |
---|
| 438 | + unsigned int val) |
---|
| 439 | +{ |
---|
| 440 | + if (is_vmballoon_stats_on()) |
---|
| 441 | + atomic64_add(val, &b->stats->general_stat[stat]); |
---|
| 442 | +} |
---|
| 443 | + |
---|
| 444 | +static inline void vmballoon_stats_page_inc(struct vmballoon *b, |
---|
| 445 | + enum vmballoon_stat_page stat, |
---|
| 446 | + enum vmballoon_page_size_type size) |
---|
| 447 | +{ |
---|
| 448 | + if (is_vmballoon_stats_on()) |
---|
| 449 | + atomic64_inc(&b->stats->page_stat[stat][size]); |
---|
| 450 | +} |
---|
| 451 | + |
---|
| 452 | +static inline void vmballoon_stats_page_add(struct vmballoon *b, |
---|
| 453 | + enum vmballoon_stat_page stat, |
---|
| 454 | + enum vmballoon_page_size_type size, |
---|
| 455 | + unsigned int val) |
---|
| 456 | +{ |
---|
| 457 | + if (is_vmballoon_stats_on()) |
---|
| 458 | + atomic64_add(val, &b->stats->page_stat[stat][size]); |
---|
| 459 | +} |
---|
| 460 | + |
---|
| 461 | +static inline unsigned long |
---|
| 462 | +__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, |
---|
| 463 | + unsigned long arg2, unsigned long *result) |
---|
| 464 | +{ |
---|
| 465 | + unsigned long status, dummy1, dummy2, dummy3, local_result; |
---|
| 466 | + |
---|
| 467 | + vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); |
---|
| 468 | + |
---|
| 469 | + asm volatile ("inl %%dx" : |
---|
| 470 | + "=a"(status), |
---|
| 471 | + "=c"(dummy1), |
---|
| 472 | + "=d"(dummy2), |
---|
| 473 | + "=b"(local_result), |
---|
| 474 | + "=S"(dummy3) : |
---|
| 475 | + "0"(VMW_BALLOON_HV_MAGIC), |
---|
| 476 | + "1"(cmd), |
---|
| 477 | + "2"(VMW_BALLOON_HV_PORT), |
---|
| 478 | + "3"(arg1), |
---|
| 479 | + "4"(arg2) : |
---|
| 480 | + "memory"); |
---|
| 481 | + |
---|
| 482 | + /* update the result if needed */ |
---|
| 483 | + if (result) |
---|
| 484 | + *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : |
---|
| 485 | + local_result; |
---|
| 486 | + |
---|
| 487 | + /* update target when applicable */ |
---|
| 488 | + if (status == VMW_BALLOON_SUCCESS && |
---|
| 489 | + ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) |
---|
| 490 | + WRITE_ONCE(b->target, local_result); |
---|
| 491 | + |
---|
| 492 | + if (status != VMW_BALLOON_SUCCESS && |
---|
| 493 | + status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { |
---|
| 494 | + vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); |
---|
| 495 | + pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", |
---|
| 496 | + __func__, vmballoon_cmd_names[cmd], arg1, arg2, |
---|
| 497 | + status); |
---|
| 498 | + } |
---|
| 499 | + |
---|
| 500 | + /* mark reset required accordingly */ |
---|
| 501 | + if (status == VMW_BALLOON_ERROR_RESET) |
---|
| 502 | + b->reset_required = true; |
---|
| 503 | + |
---|
| 504 | + return status; |
---|
| 505 | +} |
---|
| 506 | + |
---|
| 507 | +static __always_inline unsigned long |
---|
| 508 | +vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, |
---|
| 509 | + unsigned long arg2) |
---|
| 510 | +{ |
---|
| 511 | + unsigned long dummy; |
---|
| 512 | + |
---|
| 513 | + return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); |
---|
| 514 | +} |
---|
274 | 515 | |
---|
275 | 516 | /* |
---|
276 | 517 | * Send "start" command to the host, communicating supported version |
---|
277 | 518 | * of the protocol. |
---|
278 | 519 | */ |
---|
279 | | -static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) |
---|
| 520 | +static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) |
---|
280 | 521 | { |
---|
281 | | - unsigned long status, capabilities, dummy = 0; |
---|
282 | | - bool success; |
---|
| 522 | + unsigned long status, capabilities; |
---|
283 | 523 | |
---|
284 | | - STATS_INC(b->stats.start); |
---|
285 | | - |
---|
286 | | - status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); |
---|
| 524 | + status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, |
---|
| 525 | + &capabilities); |
---|
287 | 526 | |
---|
288 | 527 | switch (status) { |
---|
289 | 528 | case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: |
---|
290 | 529 | b->capabilities = capabilities; |
---|
291 | | - success = true; |
---|
292 | 530 | break; |
---|
293 | 531 | case VMW_BALLOON_SUCCESS: |
---|
294 | 532 | b->capabilities = VMW_BALLOON_BASIC_CMDS; |
---|
295 | | - success = true; |
---|
296 | 533 | break; |
---|
297 | 534 | default: |
---|
298 | | - success = false; |
---|
| 535 | + return -EIO; |
---|
299 | 536 | } |
---|
300 | 537 | |
---|
301 | 538 | /* |
---|
.. | .. |
---|
303 | 540 | * reason disabled, do not use 2MB pages, since otherwise the legacy |
---|
304 | 541 | * mechanism is used with 2MB pages, causing a failure. |
---|
305 | 542 | */ |
---|
| 543 | + b->max_page_size = VMW_BALLOON_4K_PAGE; |
---|
306 | 544 | if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && |
---|
307 | 545 | (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) |
---|
308 | | - b->supported_page_sizes = 2; |
---|
309 | | - else |
---|
310 | | - b->supported_page_sizes = 1; |
---|
| 546 | + b->max_page_size = VMW_BALLOON_2M_PAGE; |
---|
311 | 547 | |
---|
312 | | - if (!success) { |
---|
313 | | - pr_debug("%s - failed, hv returns %ld\n", __func__, status); |
---|
314 | | - STATS_INC(b->stats.start_fail); |
---|
315 | | - } |
---|
316 | | - return success; |
---|
| 548 | + |
---|
| 549 | + return 0; |
---|
317 | 550 | } |
---|
318 | 551 | |
---|
319 | | -static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) |
---|
320 | | -{ |
---|
321 | | - switch (status) { |
---|
322 | | - case VMW_BALLOON_SUCCESS: |
---|
323 | | - return true; |
---|
324 | | - |
---|
325 | | - case VMW_BALLOON_ERROR_RESET: |
---|
326 | | - b->reset_required = true; |
---|
327 | | - /* fall through */ |
---|
328 | | - |
---|
329 | | - default: |
---|
330 | | - return false; |
---|
331 | | - } |
---|
332 | | -} |
---|
333 | | - |
---|
334 | | -/* |
---|
| 552 | +/** |
---|
| 553 | + * vmballoon_send_guest_id - communicate guest type to the host. |
---|
| 554 | + * |
---|
| 555 | + * @b: pointer to the balloon. |
---|
| 556 | + * |
---|
335 | 557 | * Communicate guest type to the host so that it can adjust ballooning |
---|
336 | 558 | * algorithm to the one most appropriate for the guest. This command |
---|
337 | 559 | * is normally issued after sending "start" command and is part of |
---|
338 | 560 | * standard reset sequence. |
---|
| 561 | + * |
---|
| 562 | + * Return: zero on success or appropriate error code. |
---|
339 | 563 | */ |
---|
340 | | -static bool vmballoon_send_guest_id(struct vmballoon *b) |
---|
341 | | -{ |
---|
342 | | - unsigned long status, dummy = 0; |
---|
343 | | - |
---|
344 | | - status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, |
---|
345 | | - dummy); |
---|
346 | | - |
---|
347 | | - STATS_INC(b->stats.guest_type); |
---|
348 | | - |
---|
349 | | - if (vmballoon_check_status(b, status)) |
---|
350 | | - return true; |
---|
351 | | - |
---|
352 | | - pr_debug("%s - failed, hv returns %ld\n", __func__, status); |
---|
353 | | - STATS_INC(b->stats.guest_type_fail); |
---|
354 | | - return false; |
---|
355 | | -} |
---|
356 | | - |
---|
357 | | -static u16 vmballoon_page_size(bool is_2m_page) |
---|
358 | | -{ |
---|
359 | | - if (is_2m_page) |
---|
360 | | - return 1 << VMW_BALLOON_2M_SHIFT; |
---|
361 | | - |
---|
362 | | - return 1; |
---|
363 | | -} |
---|
364 | | - |
---|
365 | | -/* |
---|
366 | | - * Retrieve desired balloon size from the host. |
---|
367 | | - */ |
---|
368 | | -static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) |
---|
| 564 | +static int vmballoon_send_guest_id(struct vmballoon *b) |
---|
369 | 565 | { |
---|
370 | 566 | unsigned long status; |
---|
371 | | - unsigned long target; |
---|
372 | | - unsigned long limit; |
---|
373 | | - unsigned long dummy = 0; |
---|
374 | | - u32 limit32; |
---|
375 | 567 | |
---|
376 | | - /* |
---|
377 | | - * si_meminfo() is cheap. Moreover, we want to provide dynamic |
---|
378 | | - * max balloon size later. So let us call si_meminfo() every |
---|
379 | | - * iteration. |
---|
380 | | - */ |
---|
381 | | - si_meminfo(&b->sysinfo); |
---|
382 | | - limit = b->sysinfo.totalram; |
---|
| 568 | + status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, |
---|
| 569 | + VMW_BALLOON_GUEST_ID, 0); |
---|
383 | 570 | |
---|
384 | | - /* Ensure limit fits in 32-bits */ |
---|
385 | | - limit32 = (u32)limit; |
---|
386 | | - if (limit != limit32) |
---|
387 | | - return false; |
---|
388 | | - |
---|
389 | | - /* update stats */ |
---|
390 | | - STATS_INC(b->stats.target); |
---|
391 | | - |
---|
392 | | - status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); |
---|
393 | | - if (vmballoon_check_status(b, status)) { |
---|
394 | | - *new_target = target; |
---|
395 | | - return true; |
---|
396 | | - } |
---|
397 | | - |
---|
398 | | - pr_debug("%s - failed, hv returns %ld\n", __func__, status); |
---|
399 | | - STATS_INC(b->stats.target_fail); |
---|
400 | | - return false; |
---|
| 571 | + return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
---|
401 | 572 | } |
---|
402 | 573 | |
---|
403 | | -/* |
---|
404 | | - * Notify the host about allocated page so that host can use it without |
---|
405 | | - * fear that guest will need it. Host may reject some pages, we need to |
---|
406 | | - * check the return value and maybe submit a different page. |
---|
| 574 | +/** |
---|
| 575 | + * vmballoon_page_order() - return the order of the page |
---|
| 576 | + * @page_size: the size of the page. |
---|
| 577 | + * |
---|
| 578 | + * Return: the allocation order. |
---|
407 | 579 | */ |
---|
408 | | -static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, |
---|
409 | | - unsigned int *hv_status, unsigned int *target) |
---|
| 580 | +static inline |
---|
| 581 | +unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size) |
---|
410 | 582 | { |
---|
411 | | - unsigned long status, dummy = 0; |
---|
412 | | - u32 pfn32; |
---|
| 583 | + return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0; |
---|
| 584 | +} |
---|
413 | 585 | |
---|
414 | | - pfn32 = (u32)pfn; |
---|
415 | | - if (pfn32 != pfn) |
---|
| 586 | +/** |
---|
| 587 | + * vmballoon_page_in_frames() - returns the number of frames in a page. |
---|
| 588 | + * @page_size: the size of the page. |
---|
| 589 | + * |
---|
| 590 | + * Return: the number of 4k frames. |
---|
| 591 | + */ |
---|
| 592 | +static inline unsigned int |
---|
| 593 | +vmballoon_page_in_frames(enum vmballoon_page_size_type page_size) |
---|
| 594 | +{ |
---|
| 595 | + return 1 << vmballoon_page_order(page_size); |
---|
| 596 | +} |
---|
| 597 | + |
---|
| 598 | +/** |
---|
| 599 | + * vmballoon_mark_page_offline() - mark a page as offline |
---|
| 600 | + * @page: pointer for the page. |
---|
| 601 | + * @page_size: the size of the page. |
---|
| 602 | + */ |
---|
| 603 | +static void |
---|
| 604 | +vmballoon_mark_page_offline(struct page *page, |
---|
| 605 | + enum vmballoon_page_size_type page_size) |
---|
| 606 | +{ |
---|
| 607 | + int i; |
---|
| 608 | + |
---|
| 609 | + for (i = 0; i < vmballoon_page_in_frames(page_size); i++) |
---|
| 610 | + __SetPageOffline(page + i); |
---|
| 611 | +} |
---|
| 612 | + |
---|
| 613 | +/** |
---|
| 614 | + * vmballoon_mark_page_online() - mark a page as online |
---|
| 615 | + * @page: pointer for the page. |
---|
| 616 | + * @page_size: the size of the page. |
---|
| 617 | + */ |
---|
| 618 | +static void |
---|
| 619 | +vmballoon_mark_page_online(struct page *page, |
---|
| 620 | + enum vmballoon_page_size_type page_size) |
---|
| 621 | +{ |
---|
| 622 | + int i; |
---|
| 623 | + |
---|
| 624 | + for (i = 0; i < vmballoon_page_in_frames(page_size); i++) |
---|
| 625 | + __ClearPageOffline(page + i); |
---|
| 626 | +} |
---|
| 627 | + |
---|
| 628 | +/** |
---|
| 629 | + * vmballoon_send_get_target() - Retrieve desired balloon size from the host. |
---|
| 630 | + * |
---|
| 631 | + * @b: pointer to the balloon. |
---|
| 632 | + * |
---|
| 633 | + * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required |
---|
| 634 | + * by the host-guest protocol and EIO if an error occurred in communicating with |
---|
| 635 | + * the host. |
---|
| 636 | + */ |
---|
| 637 | +static int vmballoon_send_get_target(struct vmballoon *b) |
---|
| 638 | +{ |
---|
| 639 | + unsigned long status; |
---|
| 640 | + unsigned long limit; |
---|
| 641 | + |
---|
| 642 | + limit = totalram_pages(); |
---|
| 643 | + |
---|
| 644 | + /* Ensure limit fits in 32-bits if 64-bit targets are not supported */ |
---|
| 645 | + if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) && |
---|
| 646 | + limit != (u32)limit) |
---|
416 | 647 | return -EINVAL; |
---|
417 | 648 | |
---|
418 | | - STATS_INC(b->stats.lock[false]); |
---|
| 649 | + status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); |
---|
419 | 650 | |
---|
420 | | - *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); |
---|
421 | | - if (vmballoon_check_status(b, status)) |
---|
| 651 | + return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
---|
| 652 | +} |
---|
| 653 | + |
---|
| 654 | +/** |
---|
| 655 | + * vmballoon_alloc_page_list - allocates a list of pages. |
---|
| 656 | + * |
---|
| 657 | + * @b: pointer to the balloon. |
---|
| 658 | + * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. |
---|
| 659 | + * @req_n_pages: the number of requested pages. |
---|
| 660 | + * |
---|
| 661 | + * Tries to allocate @req_n_pages. Add them to the list of balloon pages in |
---|
| 662 | + * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. |
---|
| 663 | + * |
---|
| 664 | + * Return: zero on success or error code otherwise. |
---|
| 665 | + */ |
---|
| 666 | +static int vmballoon_alloc_page_list(struct vmballoon *b, |
---|
| 667 | + struct vmballoon_ctl *ctl, |
---|
| 668 | + unsigned int req_n_pages) |
---|
| 669 | +{ |
---|
| 670 | + struct page *page; |
---|
| 671 | + unsigned int i; |
---|
| 672 | + |
---|
| 673 | + for (i = 0; i < req_n_pages; i++) { |
---|
| 674 | + /* |
---|
| 675 | + * First check if we happen to have pages that were allocated |
---|
| 676 | + * before. This happens when 2MB page rejected during inflation |
---|
| 677 | + * by the hypervisor, and then split into 4KB pages. |
---|
| 678 | + */ |
---|
| 679 | + if (!list_empty(&ctl->prealloc_pages)) { |
---|
| 680 | + page = list_first_entry(&ctl->prealloc_pages, |
---|
| 681 | + struct page, lru); |
---|
| 682 | + list_del(&page->lru); |
---|
| 683 | + } else { |
---|
| 684 | + if (ctl->page_size == VMW_BALLOON_2M_PAGE) |
---|
| 685 | + page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN| |
---|
| 686 | + __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER); |
---|
| 687 | + else |
---|
| 688 | + page = balloon_page_alloc(); |
---|
| 689 | + |
---|
| 690 | + vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, |
---|
| 691 | + ctl->page_size); |
---|
| 692 | + } |
---|
| 693 | + |
---|
| 694 | + if (page) { |
---|
| 695 | + /* Success. Add the page to the list and continue. */ |
---|
| 696 | + list_add(&page->lru, &ctl->pages); |
---|
| 697 | + continue; |
---|
| 698 | + } |
---|
| 699 | + |
---|
| 700 | + /* Allocation failed. Update statistics and stop. */ |
---|
| 701 | + vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, |
---|
| 702 | + ctl->page_size); |
---|
| 703 | + break; |
---|
| 704 | + } |
---|
| 705 | + |
---|
| 706 | + ctl->n_pages = i; |
---|
| 707 | + |
---|
| 708 | + return req_n_pages == ctl->n_pages ? 0 : -ENOMEM; |
---|
| 709 | +} |
---|
| 710 | + |
---|
| 711 | +/** |
---|
| 712 | + * vmballoon_handle_one_result - Handle lock/unlock result for a single page. |
---|
| 713 | + * |
---|
| 714 | + * @b: pointer for %struct vmballoon. |
---|
| 715 | + * @page: pointer for the page whose result should be handled. |
---|
| 716 | + * @page_size: size of the page. |
---|
| 717 | + * @status: status of the operation as provided by the hypervisor. |
---|
| 718 | + */ |
---|
| 719 | +static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page, |
---|
| 720 | + enum vmballoon_page_size_type page_size, |
---|
| 721 | + unsigned long status) |
---|
| 722 | +{ |
---|
| 723 | + /* On success do nothing. The page is already on the balloon list. */ |
---|
| 724 | + if (likely(status == VMW_BALLOON_SUCCESS)) |
---|
422 | 725 | return 0; |
---|
423 | 726 | |
---|
424 | | - pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); |
---|
425 | | - STATS_INC(b->stats.lock_fail[false]); |
---|
| 727 | + pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__, |
---|
| 728 | + page_to_pfn(page), status, |
---|
| 729 | + vmballoon_page_size_names[page_size]); |
---|
| 730 | + |
---|
| 731 | + /* Error occurred */ |
---|
| 732 | + vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, |
---|
| 733 | + page_size); |
---|
| 734 | + |
---|
426 | 735 | return -EIO; |
---|
427 | 736 | } |
---|
428 | 737 | |
---|
429 | | -static int vmballoon_send_batched_lock(struct vmballoon *b, |
---|
430 | | - unsigned int num_pages, bool is_2m_pages, unsigned int *target) |
---|
| 738 | +/** |
---|
| 739 | + * vmballoon_status_page - returns the status of (un)lock operation |
---|
| 740 | + * |
---|
| 741 | + * @b: pointer to the balloon. |
---|
| 742 | + * @idx: index for the page for which the operation is performed. |
---|
| 743 | + * @p: pointer to where the page struct is returned. |
---|
| 744 | + * |
---|
| 745 | + * Following a lock or unlock operation, returns the status of the operation for |
---|
| 746 | + * an individual page. Provides the page that the operation was performed on on |
---|
| 747 | + * the @page argument. |
---|
| 748 | + * |
---|
| 749 | + * Returns: The status of a lock or unlock operation for an individual page. |
---|
| 750 | + */ |
---|
| 751 | +static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, |
---|
| 752 | + struct page **p) |
---|
431 | 753 | { |
---|
432 | | - unsigned long status; |
---|
433 | | - unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); |
---|
| 754 | + if (static_branch_likely(&vmw_balloon_batching)) { |
---|
| 755 | + /* batching mode */ |
---|
| 756 | + *p = pfn_to_page(b->batch_page[idx].pfn); |
---|
| 757 | + return b->batch_page[idx].status; |
---|
| 758 | + } |
---|
434 | 759 | |
---|
435 | | - STATS_INC(b->stats.lock[is_2m_pages]); |
---|
| 760 | + /* non-batching mode */ |
---|
| 761 | + *p = b->page; |
---|
436 | 762 | |
---|
437 | | - if (is_2m_pages) |
---|
438 | | - status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, |
---|
439 | | - *target); |
---|
| 763 | + /* |
---|
| 764 | + * If a failure occurs, the indication will be provided in the status |
---|
| 765 | + * of the entire operation, which is considered before the individual |
---|
| 766 | + * page status. So for non-batching mode, the indication is always of |
---|
| 767 | + * success. |
---|
| 768 | + */ |
---|
| 769 | + return VMW_BALLOON_SUCCESS; |
---|
| 770 | +} |
---|
| 771 | + |
---|
| 772 | +/** |
---|
| 773 | + * vmballoon_lock_op - notifies the host about inflated/deflated pages. |
---|
| 774 | + * @b: pointer to the balloon. |
---|
| 775 | + * @num_pages: number of inflated/deflated pages. |
---|
| 776 | + * @page_size: size of the page. |
---|
| 777 | + * @op: the type of operation (lock or unlock). |
---|
| 778 | + * |
---|
| 779 | + * Notify the host about page(s) that were ballooned (or removed from the |
---|
| 780 | + * balloon) so that host can use it without fear that guest will need it (or |
---|
| 781 | + * stop using them since the VM does). Host may reject some pages, we need to |
---|
| 782 | + * check the return value and maybe submit a different page. The pages that are |
---|
| 783 | + * inflated/deflated are pointed by @b->page. |
---|
| 784 | + * |
---|
| 785 | + * Return: result as provided by the hypervisor. |
---|
| 786 | + */ |
---|
| 787 | +static unsigned long vmballoon_lock_op(struct vmballoon *b, |
---|
| 788 | + unsigned int num_pages, |
---|
| 789 | + enum vmballoon_page_size_type page_size, |
---|
| 790 | + enum vmballoon_op op) |
---|
| 791 | +{ |
---|
| 792 | + unsigned long cmd, pfn; |
---|
| 793 | + |
---|
| 794 | + lockdep_assert_held(&b->comm_lock); |
---|
| 795 | + |
---|
| 796 | + if (static_branch_likely(&vmw_balloon_batching)) { |
---|
| 797 | + if (op == VMW_BALLOON_INFLATE) |
---|
| 798 | + cmd = page_size == VMW_BALLOON_2M_PAGE ? |
---|
| 799 | + VMW_BALLOON_CMD_BATCHED_2M_LOCK : |
---|
| 800 | + VMW_BALLOON_CMD_BATCHED_LOCK; |
---|
| 801 | + else |
---|
| 802 | + cmd = page_size == VMW_BALLOON_2M_PAGE ? |
---|
| 803 | + VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : |
---|
| 804 | + VMW_BALLOON_CMD_BATCHED_UNLOCK; |
---|
| 805 | + |
---|
| 806 | + pfn = PHYS_PFN(virt_to_phys(b->batch_page)); |
---|
| 807 | + } else { |
---|
| 808 | + cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK : |
---|
| 809 | + VMW_BALLOON_CMD_UNLOCK; |
---|
| 810 | + pfn = page_to_pfn(b->page); |
---|
| 811 | + |
---|
| 812 | + /* In non-batching mode, PFNs must fit in 32-bit */ |
---|
| 813 | + if (unlikely(pfn != (u32)pfn)) |
---|
| 814 | + return VMW_BALLOON_ERROR_PPN_INVALID; |
---|
| 815 | + } |
---|
| 816 | + |
---|
| 817 | + return vmballoon_cmd(b, cmd, pfn, num_pages); |
---|
| 818 | +} |
---|
| 819 | + |
---|
| 820 | +/** |
---|
| 821 | + * vmballoon_add_page - adds a page towards lock/unlock operation. |
---|
| 822 | + * |
---|
| 823 | + * @b: pointer to the balloon. |
---|
| 824 | + * @idx: index of the page to be ballooned in this batch. |
---|
| 825 | + * @p: pointer to the page that is about to be ballooned. |
---|
| 826 | + * |
---|
| 827 | + * Adds the page to be ballooned. Must be called while holding @comm_lock. |
---|
| 828 | + */ |
---|
| 829 | +static void vmballoon_add_page(struct vmballoon *b, unsigned int idx, |
---|
| 830 | + struct page *p) |
---|
| 831 | +{ |
---|
| 832 | + lockdep_assert_held(&b->comm_lock); |
---|
| 833 | + |
---|
| 834 | + if (static_branch_likely(&vmw_balloon_batching)) |
---|
| 835 | + b->batch_page[idx] = (struct vmballoon_batch_entry) |
---|
| 836 | + { .pfn = page_to_pfn(p) }; |
---|
440 | 837 | else |
---|
441 | | - status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, |
---|
442 | | - *target); |
---|
| 838 | + b->page = p; |
---|
| 839 | +} |
---|
443 | 840 | |
---|
444 | | - if (vmballoon_check_status(b, status)) |
---|
| 841 | +/** |
---|
| 842 | + * vmballoon_lock - lock or unlock a batch of pages. |
---|
| 843 | + * |
---|
| 844 | + * @b: pointer to the balloon. |
---|
| 845 | + * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. |
---|
| 846 | + * |
---|
| 847 | + * Notifies the host of about ballooned pages (after inflation or deflation, |
---|
| 848 | + * according to @ctl). If the host rejects the page put it on the |
---|
| 849 | + * @ctl refuse list. These refused page are then released when moving to the |
---|
| 850 | + * next size of pages. |
---|
| 851 | + * |
---|
| 852 | + * Note that we neither free any @page here nor put them back on the ballooned |
---|
| 853 | + * pages list. Instead we queue it for later processing. We do that for several |
---|
| 854 | + * reasons. First, we do not want to free the page under the lock. Second, it |
---|
| 855 | + * allows us to unify the handling of lock and unlock. In the inflate case, the |
---|
| 856 | + * caller will check if there are too many refused pages and release them. |
---|
| 857 | + * Although it is not identical to the past behavior, it should not affect |
---|
| 858 | + * performance. |
---|
| 859 | + */ |
---|
| 860 | +static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl) |
---|
| 861 | +{ |
---|
| 862 | + unsigned long batch_status; |
---|
| 863 | + struct page *page; |
---|
| 864 | + unsigned int i, num_pages; |
---|
| 865 | + |
---|
| 866 | + num_pages = ctl->n_pages; |
---|
| 867 | + if (num_pages == 0) |
---|
445 | 868 | return 0; |
---|
446 | 869 | |
---|
447 | | - pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); |
---|
448 | | - STATS_INC(b->stats.lock_fail[is_2m_pages]); |
---|
449 | | - return 1; |
---|
450 | | -} |
---|
| 870 | + /* communication with the host is done under the communication lock */ |
---|
| 871 | + spin_lock(&b->comm_lock); |
---|
451 | 872 | |
---|
452 | | -/* |
---|
453 | | - * Notify the host that guest intends to release given page back into |
---|
454 | | - * the pool of available (to the guest) pages. |
---|
455 | | - */ |
---|
456 | | -static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, |
---|
457 | | - unsigned int *target) |
---|
458 | | -{ |
---|
459 | | - unsigned long status, dummy = 0; |
---|
460 | | - u32 pfn32; |
---|
| 873 | + i = 0; |
---|
| 874 | + list_for_each_entry(page, &ctl->pages, lru) |
---|
| 875 | + vmballoon_add_page(b, i++, page); |
---|
461 | 876 | |
---|
462 | | - pfn32 = (u32)pfn; |
---|
463 | | - if (pfn32 != pfn) |
---|
464 | | - return false; |
---|
| 877 | + batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size, |
---|
| 878 | + ctl->op); |
---|
465 | 879 | |
---|
466 | | - STATS_INC(b->stats.unlock[false]); |
---|
| 880 | + /* |
---|
| 881 | + * Iterate over the pages in the provided list. Since we are changing |
---|
| 882 | + * @ctl->n_pages we are saving the original value in @num_pages and |
---|
| 883 | + * use this value to bound the loop. |
---|
| 884 | + */ |
---|
| 885 | + for (i = 0; i < num_pages; i++) { |
---|
| 886 | + unsigned long status; |
---|
467 | 887 | |
---|
468 | | - status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); |
---|
469 | | - if (vmballoon_check_status(b, status)) |
---|
470 | | - return true; |
---|
471 | | - |
---|
472 | | - pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); |
---|
473 | | - STATS_INC(b->stats.unlock_fail[false]); |
---|
474 | | - return false; |
---|
475 | | -} |
---|
476 | | - |
---|
477 | | -static bool vmballoon_send_batched_unlock(struct vmballoon *b, |
---|
478 | | - unsigned int num_pages, bool is_2m_pages, unsigned int *target) |
---|
479 | | -{ |
---|
480 | | - unsigned long status; |
---|
481 | | - unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); |
---|
482 | | - |
---|
483 | | - STATS_INC(b->stats.unlock[is_2m_pages]); |
---|
484 | | - |
---|
485 | | - if (is_2m_pages) |
---|
486 | | - status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, |
---|
487 | | - *target); |
---|
488 | | - else |
---|
489 | | - status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, |
---|
490 | | - *target); |
---|
491 | | - |
---|
492 | | - if (vmballoon_check_status(b, status)) |
---|
493 | | - return true; |
---|
494 | | - |
---|
495 | | - pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); |
---|
496 | | - STATS_INC(b->stats.unlock_fail[is_2m_pages]); |
---|
497 | | - return false; |
---|
498 | | -} |
---|
499 | | - |
---|
500 | | -static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) |
---|
501 | | -{ |
---|
502 | | - if (is_2m_page) |
---|
503 | | - return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); |
---|
504 | | - |
---|
505 | | - return alloc_page(flags); |
---|
506 | | -} |
---|
507 | | - |
---|
508 | | -static void vmballoon_free_page(struct page *page, bool is_2m_page) |
---|
509 | | -{ |
---|
510 | | - if (is_2m_page) |
---|
511 | | - __free_pages(page, VMW_BALLOON_2M_SHIFT); |
---|
512 | | - else |
---|
513 | | - __free_page(page); |
---|
514 | | -} |
---|
515 | | - |
---|
516 | | -/* |
---|
517 | | - * Quickly release all pages allocated for the balloon. This function is |
---|
518 | | - * called when host decides to "reset" balloon for one reason or another. |
---|
519 | | - * Unlike normal "deflate" we do not (shall not) notify host of the pages |
---|
520 | | - * being released. |
---|
521 | | - */ |
---|
522 | | -static void vmballoon_pop(struct vmballoon *b) |
---|
523 | | -{ |
---|
524 | | - struct page *page, *next; |
---|
525 | | - unsigned is_2m_pages; |
---|
526 | | - |
---|
527 | | - for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; |
---|
528 | | - is_2m_pages++) { |
---|
529 | | - struct vmballoon_page_size *page_size = |
---|
530 | | - &b->page_sizes[is_2m_pages]; |
---|
531 | | - u16 size_per_page = vmballoon_page_size(is_2m_pages); |
---|
532 | | - |
---|
533 | | - list_for_each_entry_safe(page, next, &page_size->pages, lru) { |
---|
534 | | - list_del(&page->lru); |
---|
535 | | - vmballoon_free_page(page, is_2m_pages); |
---|
536 | | - STATS_INC(b->stats.free[is_2m_pages]); |
---|
537 | | - b->size -= size_per_page; |
---|
538 | | - cond_resched(); |
---|
539 | | - } |
---|
540 | | - } |
---|
541 | | - |
---|
542 | | - /* Clearing the batch_page unconditionally has no adverse effect */ |
---|
543 | | - free_page((unsigned long)b->batch_page); |
---|
544 | | - b->batch_page = NULL; |
---|
545 | | -} |
---|
546 | | - |
---|
547 | | -/* |
---|
548 | | - * Notify the host of a ballooned page. If host rejects the page put it on the |
---|
549 | | - * refuse list, those refused page are then released at the end of the |
---|
550 | | - * inflation cycle. |
---|
551 | | - */ |
---|
552 | | -static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, |
---|
553 | | - bool is_2m_pages, unsigned int *target) |
---|
554 | | -{ |
---|
555 | | - int locked, hv_status; |
---|
556 | | - struct page *page = b->page; |
---|
557 | | - struct vmballoon_page_size *page_size = &b->page_sizes[false]; |
---|
558 | | - |
---|
559 | | - /* is_2m_pages can never happen as 2m pages support implies batching */ |
---|
560 | | - |
---|
561 | | - locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, |
---|
562 | | - target); |
---|
563 | | - if (locked) { |
---|
564 | | - STATS_INC(b->stats.refused_alloc[false]); |
---|
565 | | - |
---|
566 | | - if (locked == -EIO && |
---|
567 | | - (hv_status == VMW_BALLOON_ERROR_RESET || |
---|
568 | | - hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) { |
---|
569 | | - vmballoon_free_page(page, false); |
---|
570 | | - return -EIO; |
---|
571 | | - } |
---|
| 888 | + status = vmballoon_status_page(b, i, &page); |
---|
572 | 889 | |
---|
573 | 890 | /* |
---|
574 | | - * Place page on the list of non-balloonable pages |
---|
575 | | - * and retry allocation, unless we already accumulated |
---|
576 | | - * too many of them, in which case take a breather. |
---|
| 891 | + * Failure of the whole batch overrides a single operation |
---|
| 892 | + * results. |
---|
577 | 893 | */ |
---|
578 | | - if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { |
---|
579 | | - page_size->n_refused_pages++; |
---|
580 | | - list_add(&page->lru, &page_size->refused_pages); |
---|
581 | | - } else { |
---|
582 | | - vmballoon_free_page(page, false); |
---|
583 | | - } |
---|
584 | | - return locked; |
---|
| 894 | + if (batch_status != VMW_BALLOON_SUCCESS) |
---|
| 895 | + status = batch_status; |
---|
| 896 | + |
---|
| 897 | + /* Continue if no error happened */ |
---|
| 898 | + if (!vmballoon_handle_one_result(b, page, ctl->page_size, |
---|
| 899 | + status)) |
---|
| 900 | + continue; |
---|
| 901 | + |
---|
| 902 | + /* |
---|
| 903 | + * Error happened. Move the pages to the refused list and update |
---|
| 904 | + * the pages number. |
---|
| 905 | + */ |
---|
| 906 | + list_move(&page->lru, &ctl->refused_pages); |
---|
| 907 | + ctl->n_pages--; |
---|
| 908 | + ctl->n_refused_pages++; |
---|
585 | 909 | } |
---|
586 | 910 | |
---|
587 | | - /* track allocated page */ |
---|
588 | | - list_add(&page->lru, &page_size->pages); |
---|
| 911 | + spin_unlock(&b->comm_lock); |
---|
589 | 912 | |
---|
590 | | - /* update balloon size */ |
---|
591 | | - b->size++; |
---|
592 | | - |
---|
593 | | - return 0; |
---|
| 913 | + return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
---|
594 | 914 | } |
---|
595 | 915 | |
---|
596 | | -static int vmballoon_lock_batched_page(struct vmballoon *b, |
---|
597 | | - unsigned int num_pages, bool is_2m_pages, unsigned int *target) |
---|
598 | | -{ |
---|
599 | | - int locked, i; |
---|
600 | | - u16 size_per_page = vmballoon_page_size(is_2m_pages); |
---|
601 | | - |
---|
602 | | - locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, |
---|
603 | | - target); |
---|
604 | | - if (locked > 0) { |
---|
605 | | - for (i = 0; i < num_pages; i++) { |
---|
606 | | - u64 pa = vmballoon_batch_get_pa(b->batch_page, i); |
---|
607 | | - struct page *p = pfn_to_page(pa >> PAGE_SHIFT); |
---|
608 | | - |
---|
609 | | - vmballoon_free_page(p, is_2m_pages); |
---|
610 | | - } |
---|
611 | | - |
---|
612 | | - return -EIO; |
---|
613 | | - } |
---|
614 | | - |
---|
615 | | - for (i = 0; i < num_pages; i++) { |
---|
616 | | - u64 pa = vmballoon_batch_get_pa(b->batch_page, i); |
---|
617 | | - struct page *p = pfn_to_page(pa >> PAGE_SHIFT); |
---|
618 | | - struct vmballoon_page_size *page_size = |
---|
619 | | - &b->page_sizes[is_2m_pages]; |
---|
620 | | - |
---|
621 | | - locked = vmballoon_batch_get_status(b->batch_page, i); |
---|
622 | | - |
---|
623 | | - switch (locked) { |
---|
624 | | - case VMW_BALLOON_SUCCESS: |
---|
625 | | - list_add(&p->lru, &page_size->pages); |
---|
626 | | - b->size += size_per_page; |
---|
627 | | - break; |
---|
628 | | - case VMW_BALLOON_ERROR_PPN_PINNED: |
---|
629 | | - case VMW_BALLOON_ERROR_PPN_INVALID: |
---|
630 | | - if (page_size->n_refused_pages |
---|
631 | | - < VMW_BALLOON_MAX_REFUSED) { |
---|
632 | | - list_add(&p->lru, &page_size->refused_pages); |
---|
633 | | - page_size->n_refused_pages++; |
---|
634 | | - break; |
---|
635 | | - } |
---|
636 | | - /* Fallthrough */ |
---|
637 | | - case VMW_BALLOON_ERROR_RESET: |
---|
638 | | - case VMW_BALLOON_ERROR_PPN_NOTNEEDED: |
---|
639 | | - vmballoon_free_page(p, is_2m_pages); |
---|
640 | | - break; |
---|
641 | | - default: |
---|
642 | | - /* This should never happen */ |
---|
643 | | - WARN_ON_ONCE(true); |
---|
644 | | - } |
---|
645 | | - } |
---|
646 | | - |
---|
647 | | - return 0; |
---|
648 | | -} |
---|
649 | | - |
---|
650 | | -/* |
---|
651 | | - * Release the page allocated for the balloon. Note that we first notify |
---|
652 | | - * the host so it can make sure the page will be available for the guest |
---|
653 | | - * to use, if needed. |
---|
| 916 | +/** |
---|
| 917 | + * vmballoon_release_page_list() - Releases a page list |
---|
| 918 | + * |
---|
| 919 | + * @page_list: list of pages to release. |
---|
| 920 | + * @n_pages: pointer to the number of pages. |
---|
| 921 | + * @page_size: whether the pages in the list are 2MB (or else 4KB). |
---|
| 922 | + * |
---|
| 923 | + * Releases the list of pages and zeros the number of pages. |
---|
654 | 924 | */ |
---|
655 | | -static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, |
---|
656 | | - bool is_2m_pages, unsigned int *target) |
---|
| 925 | +static void vmballoon_release_page_list(struct list_head *page_list, |
---|
| 926 | + int *n_pages, |
---|
| 927 | + enum vmballoon_page_size_type page_size) |
---|
657 | 928 | { |
---|
658 | | - struct page *page = b->page; |
---|
659 | | - struct vmballoon_page_size *page_size = &b->page_sizes[false]; |
---|
| 929 | + struct page *page, *tmp; |
---|
660 | 930 | |
---|
661 | | - /* is_2m_pages can never happen as 2m pages support implies batching */ |
---|
662 | | - |
---|
663 | | - if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { |
---|
664 | | - list_add(&page->lru, &page_size->pages); |
---|
665 | | - return -EIO; |
---|
| 931 | + list_for_each_entry_safe(page, tmp, page_list, lru) { |
---|
| 932 | + list_del(&page->lru); |
---|
| 933 | + __free_pages(page, vmballoon_page_order(page_size)); |
---|
666 | 934 | } |
---|
667 | 935 | |
---|
668 | | - /* deallocate page */ |
---|
669 | | - vmballoon_free_page(page, false); |
---|
670 | | - STATS_INC(b->stats.free[false]); |
---|
671 | | - |
---|
672 | | - /* update balloon size */ |
---|
673 | | - b->size--; |
---|
674 | | - |
---|
675 | | - return 0; |
---|
| 936 | + if (n_pages) |
---|
| 937 | + *n_pages = 0; |
---|
676 | 938 | } |
---|
677 | 939 | |
---|
678 | | -static int vmballoon_unlock_batched_page(struct vmballoon *b, |
---|
679 | | - unsigned int num_pages, bool is_2m_pages, |
---|
680 | | - unsigned int *target) |
---|
681 | | -{ |
---|
682 | | - int locked, i, ret = 0; |
---|
683 | | - bool hv_success; |
---|
684 | | - u16 size_per_page = vmballoon_page_size(is_2m_pages); |
---|
685 | | - |
---|
686 | | - hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, |
---|
687 | | - target); |
---|
688 | | - if (!hv_success) |
---|
689 | | - ret = -EIO; |
---|
690 | | - |
---|
691 | | - for (i = 0; i < num_pages; i++) { |
---|
692 | | - u64 pa = vmballoon_batch_get_pa(b->batch_page, i); |
---|
693 | | - struct page *p = pfn_to_page(pa >> PAGE_SHIFT); |
---|
694 | | - struct vmballoon_page_size *page_size = |
---|
695 | | - &b->page_sizes[is_2m_pages]; |
---|
696 | | - |
---|
697 | | - locked = vmballoon_batch_get_status(b->batch_page, i); |
---|
698 | | - if (!hv_success || locked != VMW_BALLOON_SUCCESS) { |
---|
699 | | - /* |
---|
700 | | - * That page wasn't successfully unlocked by the |
---|
701 | | - * hypervisor, re-add it to the list of pages owned by |
---|
702 | | - * the balloon driver. |
---|
703 | | - */ |
---|
704 | | - list_add(&p->lru, &page_size->pages); |
---|
705 | | - } else { |
---|
706 | | - /* deallocate page */ |
---|
707 | | - vmballoon_free_page(p, is_2m_pages); |
---|
708 | | - STATS_INC(b->stats.free[is_2m_pages]); |
---|
709 | | - |
---|
710 | | - /* update balloon size */ |
---|
711 | | - b->size -= size_per_page; |
---|
712 | | - } |
---|
713 | | - } |
---|
714 | | - |
---|
715 | | - return ret; |
---|
716 | | -} |
---|
717 | 940 | |
---|
718 | 941 | /* |
---|
719 | 942 | * Release pages that were allocated while attempting to inflate the |
---|
720 | 943 | * balloon but were refused by the host for one reason or another. |
---|
721 | 944 | */ |
---|
722 | 945 | static void vmballoon_release_refused_pages(struct vmballoon *b, |
---|
723 | | - bool is_2m_pages) |
---|
| 946 | + struct vmballoon_ctl *ctl) |
---|
724 | 947 | { |
---|
725 | | - struct page *page, *next; |
---|
726 | | - struct vmballoon_page_size *page_size = |
---|
727 | | - &b->page_sizes[is_2m_pages]; |
---|
| 948 | + vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, |
---|
| 949 | + ctl->page_size); |
---|
728 | 950 | |
---|
729 | | - list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { |
---|
730 | | - list_del(&page->lru); |
---|
731 | | - vmballoon_free_page(page, is_2m_pages); |
---|
732 | | - STATS_INC(b->stats.refused_free[is_2m_pages]); |
---|
| 951 | + vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages, |
---|
| 952 | + ctl->page_size); |
---|
| 953 | +} |
---|
| 954 | + |
---|
| 955 | +/** |
---|
| 956 | + * vmballoon_change - retrieve the required balloon change |
---|
| 957 | + * |
---|
| 958 | + * @b: pointer for the balloon. |
---|
| 959 | + * |
---|
| 960 | + * Return: the required change for the balloon size. A positive number |
---|
| 961 | + * indicates inflation, a negative number indicates a deflation. |
---|
| 962 | + */ |
---|
| 963 | +static int64_t vmballoon_change(struct vmballoon *b) |
---|
| 964 | +{ |
---|
| 965 | + int64_t size, target; |
---|
| 966 | + |
---|
| 967 | + size = atomic64_read(&b->size); |
---|
| 968 | + target = READ_ONCE(b->target); |
---|
| 969 | + |
---|
| 970 | + /* |
---|
| 971 | + * We must cast first because of int sizes |
---|
| 972 | + * Otherwise we might get huge positives instead of negatives |
---|
| 973 | + */ |
---|
| 974 | + |
---|
| 975 | + if (b->reset_required) |
---|
| 976 | + return 0; |
---|
| 977 | + |
---|
| 978 | + /* consider a 2MB slack on deflate, unless the balloon is emptied */ |
---|
| 979 | + if (target < size && target != 0 && |
---|
| 980 | + size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) |
---|
| 981 | + return 0; |
---|
| 982 | + |
---|
| 983 | + /* If an out-of-memory recently occurred, inflation is disallowed. */ |
---|
| 984 | + if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout))) |
---|
| 985 | + return 0; |
---|
| 986 | + |
---|
| 987 | + return target - size; |
---|
| 988 | +} |
---|
| 989 | + |
---|
| 990 | +/** |
---|
| 991 | + * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. |
---|
| 992 | + * |
---|
| 993 | + * @b: pointer to balloon. |
---|
| 994 | + * @pages: list of pages to enqueue. |
---|
| 995 | + * @n_pages: pointer to number of pages in list. The value is zeroed. |
---|
| 996 | + * @page_size: whether the pages are 2MB or 4KB pages. |
---|
| 997 | + * |
---|
| 998 | + * Enqueues the provides list of pages in the ballooned page list, clears the |
---|
| 999 | + * list and zeroes the number of pages that was provided. |
---|
| 1000 | + */ |
---|
| 1001 | +static void vmballoon_enqueue_page_list(struct vmballoon *b, |
---|
| 1002 | + struct list_head *pages, |
---|
| 1003 | + unsigned int *n_pages, |
---|
| 1004 | + enum vmballoon_page_size_type page_size) |
---|
| 1005 | +{ |
---|
| 1006 | + unsigned long flags; |
---|
| 1007 | + struct page *page; |
---|
| 1008 | + |
---|
| 1009 | + if (page_size == VMW_BALLOON_4K_PAGE) { |
---|
| 1010 | + balloon_page_list_enqueue(&b->b_dev_info, pages); |
---|
| 1011 | + } else { |
---|
| 1012 | + /* |
---|
| 1013 | + * Keep the huge pages in a local list which is not available |
---|
| 1014 | + * for the balloon compaction mechanism. |
---|
| 1015 | + */ |
---|
| 1016 | + spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); |
---|
| 1017 | + |
---|
| 1018 | + list_for_each_entry(page, pages, lru) { |
---|
| 1019 | + vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE); |
---|
| 1020 | + } |
---|
| 1021 | + |
---|
| 1022 | + list_splice_init(pages, &b->huge_pages); |
---|
| 1023 | + __count_vm_events(BALLOON_INFLATE, *n_pages * |
---|
| 1024 | + vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); |
---|
| 1025 | + spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); |
---|
733 | 1026 | } |
---|
734 | 1027 | |
---|
735 | | - page_size->n_refused_pages = 0; |
---|
| 1028 | + *n_pages = 0; |
---|
736 | 1029 | } |
---|
737 | 1030 | |
---|
738 | | -static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) |
---|
| 1031 | +/** |
---|
| 1032 | + * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. |
---|
| 1033 | + * |
---|
| 1034 | + * @b: pointer to balloon. |
---|
| 1035 | + * @pages: list of pages to enqueue. |
---|
| 1036 | + * @n_pages: pointer to number of pages in list. The value is zeroed. |
---|
| 1037 | + * @page_size: whether the pages are 2MB or 4KB pages. |
---|
| 1038 | + * @n_req_pages: the number of requested pages. |
---|
| 1039 | + * |
---|
| 1040 | + * Dequeues the number of requested pages from the balloon for deflation. The |
---|
| 1041 | + * number of dequeued pages may be lower, if not enough pages in the requested |
---|
| 1042 | + * size are available. |
---|
| 1043 | + */ |
---|
| 1044 | +static void vmballoon_dequeue_page_list(struct vmballoon *b, |
---|
| 1045 | + struct list_head *pages, |
---|
| 1046 | + unsigned int *n_pages, |
---|
| 1047 | + enum vmballoon_page_size_type page_size, |
---|
| 1048 | + unsigned int n_req_pages) |
---|
739 | 1049 | { |
---|
740 | | - b->page = p; |
---|
| 1050 | + struct page *page, *tmp; |
---|
| 1051 | + unsigned int i = 0; |
---|
| 1052 | + unsigned long flags; |
---|
| 1053 | + |
---|
| 1054 | + /* In the case of 4k pages, use the compaction infrastructure */ |
---|
| 1055 | + if (page_size == VMW_BALLOON_4K_PAGE) { |
---|
| 1056 | + *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages, |
---|
| 1057 | + n_req_pages); |
---|
| 1058 | + return; |
---|
| 1059 | + } |
---|
| 1060 | + |
---|
| 1061 | + /* 2MB pages */ |
---|
| 1062 | + spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); |
---|
| 1063 | + list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) { |
---|
| 1064 | + vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE); |
---|
| 1065 | + |
---|
| 1066 | + list_move(&page->lru, pages); |
---|
| 1067 | + if (++i == n_req_pages) |
---|
| 1068 | + break; |
---|
| 1069 | + } |
---|
| 1070 | + |
---|
| 1071 | + __count_vm_events(BALLOON_DEFLATE, |
---|
| 1072 | + i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); |
---|
| 1073 | + spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); |
---|
| 1074 | + *n_pages = i; |
---|
741 | 1075 | } |
---|
742 | 1076 | |
---|
743 | | -static void vmballoon_add_batched_page(struct vmballoon *b, int idx, |
---|
744 | | - struct page *p) |
---|
| 1077 | +/** |
---|
| 1078 | + * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k. |
---|
| 1079 | + * |
---|
| 1080 | + * If inflation of 2MB pages was denied by the hypervisor, it is likely to be |
---|
| 1081 | + * due to one or few 4KB pages. These 2MB pages may keep being allocated and |
---|
| 1082 | + * then being refused. To prevent this case, this function splits the refused |
---|
| 1083 | + * pages into 4KB pages and adds them into @prealloc_pages list. |
---|
| 1084 | + * |
---|
| 1085 | + * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. |
---|
| 1086 | + */ |
---|
| 1087 | +static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl) |
---|
745 | 1088 | { |
---|
746 | | - vmballoon_batch_set_pa(b->batch_page, idx, |
---|
747 | | - (u64)page_to_pfn(p) << PAGE_SHIFT); |
---|
| 1089 | + struct page *page, *tmp; |
---|
| 1090 | + unsigned int i, order; |
---|
| 1091 | + |
---|
| 1092 | + order = vmballoon_page_order(ctl->page_size); |
---|
| 1093 | + |
---|
| 1094 | + list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) { |
---|
| 1095 | + list_del(&page->lru); |
---|
| 1096 | + split_page(page, order); |
---|
| 1097 | + for (i = 0; i < (1 << order); i++) |
---|
| 1098 | + list_add(&page[i].lru, &ctl->prealloc_pages); |
---|
| 1099 | + } |
---|
| 1100 | + ctl->n_refused_pages = 0; |
---|
748 | 1101 | } |
---|
749 | 1102 | |
---|
750 | | -/* |
---|
751 | | - * Inflate the balloon towards its target size. Note that we try to limit |
---|
752 | | - * the rate of allocation to make sure we are not choking the rest of the |
---|
753 | | - * system. |
---|
| 1103 | +/** |
---|
| 1104 | + * vmballoon_inflate() - Inflate the balloon towards its target size. |
---|
| 1105 | + * |
---|
| 1106 | + * @b: pointer to the balloon. |
---|
754 | 1107 | */ |
---|
755 | 1108 | static void vmballoon_inflate(struct vmballoon *b) |
---|
756 | 1109 | { |
---|
757 | | - unsigned int num_pages = 0; |
---|
758 | | - int error = 0; |
---|
759 | | - gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; |
---|
760 | | - bool is_2m_pages; |
---|
| 1110 | + int64_t to_inflate_frames; |
---|
| 1111 | + struct vmballoon_ctl ctl = { |
---|
| 1112 | + .pages = LIST_HEAD_INIT(ctl.pages), |
---|
| 1113 | + .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), |
---|
| 1114 | + .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages), |
---|
| 1115 | + .page_size = b->max_page_size, |
---|
| 1116 | + .op = VMW_BALLOON_INFLATE |
---|
| 1117 | + }; |
---|
761 | 1118 | |
---|
762 | | - pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); |
---|
| 1119 | + while ((to_inflate_frames = vmballoon_change(b)) > 0) { |
---|
| 1120 | + unsigned int to_inflate_pages, page_in_frames; |
---|
| 1121 | + int alloc_error, lock_error = 0; |
---|
763 | 1122 | |
---|
764 | | - /* |
---|
765 | | - * First try NOSLEEP page allocations to inflate balloon. |
---|
766 | | - * |
---|
767 | | - * If we do not throttle nosleep allocations, we can drain all |
---|
768 | | - * free pages in the guest quickly (if the balloon target is high). |
---|
769 | | - * As a side-effect, draining free pages helps to inform (force) |
---|
770 | | - * the guest to start swapping if balloon target is not met yet, |
---|
771 | | - * which is a desired behavior. However, balloon driver can consume |
---|
772 | | - * all available CPU cycles if too many pages are allocated in a |
---|
773 | | - * second. Therefore, we throttle nosleep allocations even when |
---|
774 | | - * the guest is not under memory pressure. OTOH, if we have already |
---|
775 | | - * predicted that the guest is under memory pressure, then we |
---|
776 | | - * slowdown page allocations considerably. |
---|
777 | | - */ |
---|
| 1123 | + VM_BUG_ON(!list_empty(&ctl.pages)); |
---|
| 1124 | + VM_BUG_ON(ctl.n_pages != 0); |
---|
778 | 1125 | |
---|
779 | | - /* |
---|
780 | | - * Start with no sleep allocation rate which may be higher |
---|
781 | | - * than sleeping allocation rate. |
---|
782 | | - */ |
---|
783 | | - is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; |
---|
| 1126 | + page_in_frames = vmballoon_page_in_frames(ctl.page_size); |
---|
784 | 1127 | |
---|
785 | | - pr_debug("%s - goal: %d", __func__, b->target - b->size); |
---|
| 1128 | + to_inflate_pages = min_t(unsigned long, b->batch_max_pages, |
---|
| 1129 | + DIV_ROUND_UP_ULL(to_inflate_frames, |
---|
| 1130 | + page_in_frames)); |
---|
786 | 1131 | |
---|
787 | | - while (!b->reset_required && |
---|
788 | | - b->size + num_pages * vmballoon_page_size(is_2m_pages) |
---|
789 | | - < b->target) { |
---|
790 | | - struct page *page; |
---|
| 1132 | + /* Start by allocating */ |
---|
| 1133 | + alloc_error = vmballoon_alloc_page_list(b, &ctl, |
---|
| 1134 | + to_inflate_pages); |
---|
791 | 1135 | |
---|
792 | | - if (flags == VMW_PAGE_ALLOC_NOSLEEP) |
---|
793 | | - STATS_INC(b->stats.alloc[is_2m_pages]); |
---|
794 | | - else |
---|
795 | | - STATS_INC(b->stats.sleep_alloc); |
---|
| 1136 | + /* Actually lock the pages by telling the hypervisor */ |
---|
| 1137 | + lock_error = vmballoon_lock(b, &ctl); |
---|
796 | 1138 | |
---|
797 | | - page = vmballoon_alloc_page(flags, is_2m_pages); |
---|
798 | | - if (!page) { |
---|
799 | | - STATS_INC(b->stats.alloc_fail[is_2m_pages]); |
---|
| 1139 | + /* |
---|
| 1140 | + * If an error indicates that something serious went wrong, |
---|
| 1141 | + * stop the inflation. |
---|
| 1142 | + */ |
---|
| 1143 | + if (lock_error) |
---|
| 1144 | + break; |
---|
800 | 1145 | |
---|
801 | | - if (is_2m_pages) { |
---|
802 | | - b->ops->lock(b, num_pages, true, &b->target); |
---|
| 1146 | + /* Update the balloon size */ |
---|
| 1147 | + atomic64_add(ctl.n_pages * page_in_frames, &b->size); |
---|
803 | 1148 | |
---|
804 | | - /* |
---|
805 | | - * ignore errors from locking as we now switch |
---|
806 | | - * to 4k pages and we might get different |
---|
807 | | - * errors. |
---|
808 | | - */ |
---|
| 1149 | + vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages, |
---|
| 1150 | + ctl.page_size); |
---|
809 | 1151 | |
---|
810 | | - num_pages = 0; |
---|
811 | | - is_2m_pages = false; |
---|
812 | | - continue; |
---|
813 | | - } |
---|
814 | | - |
---|
815 | | - if (flags == VMW_PAGE_ALLOC_CANSLEEP) { |
---|
816 | | - /* |
---|
817 | | - * CANSLEEP page allocation failed, so guest |
---|
818 | | - * is under severe memory pressure. We just log |
---|
819 | | - * the event, but do not stop the inflation |
---|
820 | | - * due to its negative impact on performance. |
---|
821 | | - */ |
---|
822 | | - STATS_INC(b->stats.sleep_alloc_fail); |
---|
| 1152 | + /* |
---|
| 1153 | + * If allocation failed or the number of refused pages exceeds |
---|
| 1154 | + * the maximum allowed, move to the next page size. |
---|
| 1155 | + */ |
---|
| 1156 | + if (alloc_error || |
---|
| 1157 | + ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { |
---|
| 1158 | + if (ctl.page_size == VMW_BALLOON_4K_PAGE) |
---|
823 | 1159 | break; |
---|
824 | | - } |
---|
825 | 1160 | |
---|
826 | 1161 | /* |
---|
827 | | - * NOSLEEP page allocation failed, so the guest is |
---|
828 | | - * under memory pressure. Slowing down page alloctions |
---|
829 | | - * seems to be reasonable, but doing so might actually |
---|
830 | | - * cause the hypervisor to throttle us down, resulting |
---|
831 | | - * in degraded performance. We will count on the |
---|
832 | | - * scheduler and standard memory management mechanisms |
---|
833 | | - * for now. |
---|
| 1162 | + * Split the refused pages to 4k. This will also empty |
---|
| 1163 | + * the refused pages list. |
---|
834 | 1164 | */ |
---|
835 | | - flags = VMW_PAGE_ALLOC_CANSLEEP; |
---|
836 | | - continue; |
---|
837 | | - } |
---|
838 | | - |
---|
839 | | - b->ops->add_page(b, num_pages++, page); |
---|
840 | | - if (num_pages == b->batch_max_pages) { |
---|
841 | | - error = b->ops->lock(b, num_pages, is_2m_pages, |
---|
842 | | - &b->target); |
---|
843 | | - num_pages = 0; |
---|
844 | | - if (error) |
---|
845 | | - break; |
---|
| 1165 | + vmballoon_split_refused_pages(&ctl); |
---|
| 1166 | + ctl.page_size--; |
---|
846 | 1167 | } |
---|
847 | 1168 | |
---|
848 | 1169 | cond_resched(); |
---|
849 | 1170 | } |
---|
850 | 1171 | |
---|
851 | | - if (num_pages > 0) |
---|
852 | | - b->ops->lock(b, num_pages, is_2m_pages, &b->target); |
---|
| 1172 | + /* |
---|
| 1173 | + * Release pages that were allocated while attempting to inflate the |
---|
| 1174 | + * balloon but were refused by the host for one reason or another, |
---|
| 1175 | + * and update the statistics. |
---|
| 1176 | + */ |
---|
| 1177 | + if (ctl.n_refused_pages != 0) |
---|
| 1178 | + vmballoon_release_refused_pages(b, &ctl); |
---|
853 | 1179 | |
---|
854 | | - vmballoon_release_refused_pages(b, true); |
---|
855 | | - vmballoon_release_refused_pages(b, false); |
---|
| 1180 | + vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size); |
---|
856 | 1181 | } |
---|
857 | 1182 | |
---|
858 | | -/* |
---|
| 1183 | +/** |
---|
| 1184 | + * vmballoon_deflate() - Decrease the size of the balloon. |
---|
| 1185 | + * |
---|
| 1186 | + * @b: pointer to the balloon |
---|
| 1187 | + * @n_frames: the number of frames to deflate. If zero, automatically |
---|
| 1188 | + * calculated according to the target size. |
---|
| 1189 | + * @coordinated: whether to coordinate with the host |
---|
| 1190 | + * |
---|
859 | 1191 | * Decrease the size of the balloon allowing guest to use more memory. |
---|
| 1192 | + * |
---|
| 1193 | + * Return: The number of deflated frames (i.e., basic page size units) |
---|
860 | 1194 | */ |
---|
861 | | -static void vmballoon_deflate(struct vmballoon *b) |
---|
| 1195 | +static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, |
---|
| 1196 | + bool coordinated) |
---|
862 | 1197 | { |
---|
863 | | - unsigned is_2m_pages; |
---|
864 | | - |
---|
865 | | - pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); |
---|
| 1198 | + unsigned long deflated_frames = 0; |
---|
| 1199 | + unsigned long tried_frames = 0; |
---|
| 1200 | + struct vmballoon_ctl ctl = { |
---|
| 1201 | + .pages = LIST_HEAD_INIT(ctl.pages), |
---|
| 1202 | + .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), |
---|
| 1203 | + .page_size = VMW_BALLOON_4K_PAGE, |
---|
| 1204 | + .op = VMW_BALLOON_DEFLATE |
---|
| 1205 | + }; |
---|
866 | 1206 | |
---|
867 | 1207 | /* free pages to reach target */ |
---|
868 | | - for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; |
---|
869 | | - is_2m_pages++) { |
---|
870 | | - struct page *page, *next; |
---|
871 | | - unsigned int num_pages = 0; |
---|
872 | | - struct vmballoon_page_size *page_size = |
---|
873 | | - &b->page_sizes[is_2m_pages]; |
---|
| 1208 | + while (true) { |
---|
| 1209 | + unsigned int to_deflate_pages, n_unlocked_frames; |
---|
| 1210 | + unsigned int page_in_frames; |
---|
| 1211 | + int64_t to_deflate_frames; |
---|
| 1212 | + bool deflated_all; |
---|
874 | 1213 | |
---|
875 | | - list_for_each_entry_safe(page, next, &page_size->pages, lru) { |
---|
876 | | - if (b->reset_required || |
---|
877 | | - (b->target > 0 && |
---|
878 | | - b->size - num_pages |
---|
879 | | - * vmballoon_page_size(is_2m_pages) |
---|
880 | | - < b->target + vmballoon_page_size(true))) |
---|
| 1214 | + page_in_frames = vmballoon_page_in_frames(ctl.page_size); |
---|
| 1215 | + |
---|
| 1216 | + VM_BUG_ON(!list_empty(&ctl.pages)); |
---|
| 1217 | + VM_BUG_ON(ctl.n_pages); |
---|
| 1218 | + VM_BUG_ON(!list_empty(&ctl.refused_pages)); |
---|
| 1219 | + VM_BUG_ON(ctl.n_refused_pages); |
---|
| 1220 | + |
---|
| 1221 | + /* |
---|
| 1222 | + * If we were requested a specific number of frames, we try to |
---|
| 1223 | + * deflate this number of frames. Otherwise, deflation is |
---|
| 1224 | + * performed according to the target and balloon size. |
---|
| 1225 | + */ |
---|
| 1226 | + to_deflate_frames = n_frames ? n_frames - tried_frames : |
---|
| 1227 | + -vmballoon_change(b); |
---|
| 1228 | + |
---|
| 1229 | + /* break if no work to do */ |
---|
| 1230 | + if (to_deflate_frames <= 0) |
---|
| 1231 | + break; |
---|
| 1232 | + |
---|
| 1233 | + /* |
---|
| 1234 | + * Calculate the number of frames based on current page size, |
---|
| 1235 | + * but limit the deflated frames to a single chunk |
---|
| 1236 | + */ |
---|
| 1237 | + to_deflate_pages = min_t(unsigned long, b->batch_max_pages, |
---|
| 1238 | + DIV_ROUND_UP_ULL(to_deflate_frames, |
---|
| 1239 | + page_in_frames)); |
---|
| 1240 | + |
---|
| 1241 | + /* First take the pages from the balloon pages. */ |
---|
| 1242 | + vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages, |
---|
| 1243 | + ctl.page_size, to_deflate_pages); |
---|
| 1244 | + |
---|
| 1245 | + /* |
---|
| 1246 | + * Before pages are moving to the refused list, count their |
---|
| 1247 | + * frames as frames that we tried to deflate. |
---|
| 1248 | + */ |
---|
| 1249 | + tried_frames += ctl.n_pages * page_in_frames; |
---|
| 1250 | + |
---|
| 1251 | + /* |
---|
| 1252 | + * Unlock the pages by communicating with the hypervisor if the |
---|
| 1253 | + * communication is coordinated (i.e., not pop). We ignore the |
---|
| 1254 | + * return code. Instead we check if all the pages we manage to |
---|
| 1255 | + * unlock all the pages. If we failed, we will move to the next |
---|
| 1256 | + * page size, and would eventually try again later. |
---|
| 1257 | + */ |
---|
| 1258 | + if (coordinated) |
---|
| 1259 | + vmballoon_lock(b, &ctl); |
---|
| 1260 | + |
---|
| 1261 | + /* |
---|
| 1262 | + * Check if we deflated enough. We will move to the next page |
---|
| 1263 | + * size if we did not manage to do so. This calculation takes |
---|
| 1264 | + * place now, as once the pages are released, the number of |
---|
| 1265 | + * pages is zeroed. |
---|
| 1266 | + */ |
---|
| 1267 | + deflated_all = (ctl.n_pages == to_deflate_pages); |
---|
| 1268 | + |
---|
| 1269 | + /* Update local and global counters */ |
---|
| 1270 | + n_unlocked_frames = ctl.n_pages * page_in_frames; |
---|
| 1271 | + atomic64_sub(n_unlocked_frames, &b->size); |
---|
| 1272 | + deflated_frames += n_unlocked_frames; |
---|
| 1273 | + |
---|
| 1274 | + vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE, |
---|
| 1275 | + ctl.page_size, ctl.n_pages); |
---|
| 1276 | + |
---|
| 1277 | + /* free the ballooned pages */ |
---|
| 1278 | + vmballoon_release_page_list(&ctl.pages, &ctl.n_pages, |
---|
| 1279 | + ctl.page_size); |
---|
| 1280 | + |
---|
| 1281 | + /* Return the refused pages to the ballooned list. */ |
---|
| 1282 | + vmballoon_enqueue_page_list(b, &ctl.refused_pages, |
---|
| 1283 | + &ctl.n_refused_pages, |
---|
| 1284 | + ctl.page_size); |
---|
| 1285 | + |
---|
| 1286 | + /* If we failed to unlock all the pages, move to next size. */ |
---|
| 1287 | + if (!deflated_all) { |
---|
| 1288 | + if (ctl.page_size == b->max_page_size) |
---|
881 | 1289 | break; |
---|
882 | | - |
---|
883 | | - list_del(&page->lru); |
---|
884 | | - b->ops->add_page(b, num_pages++, page); |
---|
885 | | - |
---|
886 | | - if (num_pages == b->batch_max_pages) { |
---|
887 | | - int error; |
---|
888 | | - |
---|
889 | | - error = b->ops->unlock(b, num_pages, |
---|
890 | | - is_2m_pages, &b->target); |
---|
891 | | - num_pages = 0; |
---|
892 | | - if (error) |
---|
893 | | - return; |
---|
894 | | - } |
---|
895 | | - |
---|
896 | | - cond_resched(); |
---|
| 1290 | + ctl.page_size++; |
---|
897 | 1291 | } |
---|
898 | 1292 | |
---|
899 | | - if (num_pages > 0) |
---|
900 | | - b->ops->unlock(b, num_pages, is_2m_pages, &b->target); |
---|
| 1293 | + cond_resched(); |
---|
901 | 1294 | } |
---|
| 1295 | + |
---|
| 1296 | + return deflated_frames; |
---|
902 | 1297 | } |
---|
903 | 1298 | |
---|
904 | | -static const struct vmballoon_ops vmballoon_basic_ops = { |
---|
905 | | - .add_page = vmballoon_add_page, |
---|
906 | | - .lock = vmballoon_lock_page, |
---|
907 | | - .unlock = vmballoon_unlock_page |
---|
908 | | -}; |
---|
| 1299 | +/** |
---|
| 1300 | + * vmballoon_deinit_batching - disables batching mode. |
---|
| 1301 | + * |
---|
| 1302 | + * @b: pointer to &struct vmballoon. |
---|
| 1303 | + * |
---|
| 1304 | + * Disables batching, by deallocating the page for communication with the |
---|
| 1305 | + * hypervisor and disabling the static key to indicate that batching is off. |
---|
| 1306 | + */ |
---|
| 1307 | +static void vmballoon_deinit_batching(struct vmballoon *b) |
---|
| 1308 | +{ |
---|
| 1309 | + free_page((unsigned long)b->batch_page); |
---|
| 1310 | + b->batch_page = NULL; |
---|
| 1311 | + static_branch_disable(&vmw_balloon_batching); |
---|
| 1312 | + b->batch_max_pages = 1; |
---|
| 1313 | +} |
---|
909 | 1314 | |
---|
910 | | -static const struct vmballoon_ops vmballoon_batched_ops = { |
---|
911 | | - .add_page = vmballoon_add_batched_page, |
---|
912 | | - .lock = vmballoon_lock_batched_page, |
---|
913 | | - .unlock = vmballoon_unlock_batched_page |
---|
914 | | -}; |
---|
915 | | - |
---|
916 | | -static bool vmballoon_init_batching(struct vmballoon *b) |
---|
| 1315 | +/** |
---|
| 1316 | + * vmballoon_init_batching - enable batching mode. |
---|
| 1317 | + * |
---|
| 1318 | + * @b: pointer to &struct vmballoon. |
---|
| 1319 | + * |
---|
| 1320 | + * Enables batching, by allocating a page for communication with the hypervisor |
---|
| 1321 | + * and enabling the static_key to use batching. |
---|
| 1322 | + * |
---|
| 1323 | + * Return: zero on success or an appropriate error-code. |
---|
| 1324 | + */ |
---|
| 1325 | +static int vmballoon_init_batching(struct vmballoon *b) |
---|
917 | 1326 | { |
---|
918 | 1327 | struct page *page; |
---|
919 | 1328 | |
---|
920 | 1329 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
---|
921 | 1330 | if (!page) |
---|
922 | | - return false; |
---|
| 1331 | + return -ENOMEM; |
---|
923 | 1332 | |
---|
924 | 1333 | b->batch_page = page_address(page); |
---|
925 | | - return true; |
---|
| 1334 | + b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); |
---|
| 1335 | + |
---|
| 1336 | + static_branch_enable(&vmw_balloon_batching); |
---|
| 1337 | + |
---|
| 1338 | + return 0; |
---|
926 | 1339 | } |
---|
927 | 1340 | |
---|
928 | 1341 | /* |
---|
.. | .. |
---|
932 | 1345 | { |
---|
933 | 1346 | struct vmballoon *b = client_data; |
---|
934 | 1347 | |
---|
935 | | - STATS_INC(b->stats.doorbell); |
---|
| 1348 | + vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); |
---|
936 | 1349 | |
---|
937 | 1350 | mod_delayed_work(system_freezable_wq, &b->dwork, 0); |
---|
938 | 1351 | } |
---|
.. | .. |
---|
942 | 1355 | */ |
---|
943 | 1356 | static void vmballoon_vmci_cleanup(struct vmballoon *b) |
---|
944 | 1357 | { |
---|
945 | | - int error; |
---|
946 | | - |
---|
947 | | - VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, |
---|
948 | | - VMCI_INVALID_ID, error); |
---|
949 | | - STATS_INC(b->stats.doorbell_unset); |
---|
| 1358 | + vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
---|
| 1359 | + VMCI_INVALID_ID, VMCI_INVALID_ID); |
---|
950 | 1360 | |
---|
951 | 1361 | if (!vmci_handle_is_invalid(b->vmci_doorbell)) { |
---|
952 | 1362 | vmci_doorbell_destroy(b->vmci_doorbell); |
---|
.. | .. |
---|
954 | 1364 | } |
---|
955 | 1365 | } |
---|
956 | 1366 | |
---|
957 | | -/* |
---|
958 | | - * Initialize vmci doorbell, to get notified as soon as balloon changes |
---|
| 1367 | +/** |
---|
| 1368 | + * vmballoon_vmci_init - Initialize vmci doorbell. |
---|
| 1369 | + * |
---|
| 1370 | + * @b: pointer to the balloon. |
---|
| 1371 | + * |
---|
| 1372 | + * Return: zero on success or when wakeup command not supported. Error-code |
---|
| 1373 | + * otherwise. |
---|
| 1374 | + * |
---|
| 1375 | + * Initialize vmci doorbell, to get notified as soon as balloon changes. |
---|
959 | 1376 | */ |
---|
960 | 1377 | static int vmballoon_vmci_init(struct vmballoon *b) |
---|
961 | 1378 | { |
---|
962 | | - unsigned long error, dummy; |
---|
| 1379 | + unsigned long error; |
---|
963 | 1380 | |
---|
964 | 1381 | if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) |
---|
965 | 1382 | return 0; |
---|
.. | .. |
---|
971 | 1388 | if (error != VMCI_SUCCESS) |
---|
972 | 1389 | goto fail; |
---|
973 | 1390 | |
---|
974 | | - error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context, |
---|
975 | | - b->vmci_doorbell.resource, dummy); |
---|
976 | | - |
---|
977 | | - STATS_INC(b->stats.doorbell_set); |
---|
| 1391 | + error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
---|
| 1392 | + b->vmci_doorbell.context, |
---|
| 1393 | + b->vmci_doorbell.resource, NULL); |
---|
978 | 1394 | |
---|
979 | 1395 | if (error != VMW_BALLOON_SUCCESS) |
---|
980 | 1396 | goto fail; |
---|
.. | .. |
---|
983 | 1399 | fail: |
---|
984 | 1400 | vmballoon_vmci_cleanup(b); |
---|
985 | 1401 | return -EIO; |
---|
| 1402 | +} |
---|
| 1403 | + |
---|
| 1404 | +/** |
---|
| 1405 | + * vmballoon_pop - Quickly release all pages allocate for the balloon. |
---|
| 1406 | + * |
---|
| 1407 | + * @b: pointer to the balloon. |
---|
| 1408 | + * |
---|
| 1409 | + * This function is called when host decides to "reset" balloon for one reason |
---|
| 1410 | + * or another. Unlike normal "deflate" we do not (shall not) notify host of the |
---|
| 1411 | + * pages being released. |
---|
| 1412 | + */ |
---|
| 1413 | +static void vmballoon_pop(struct vmballoon *b) |
---|
| 1414 | +{ |
---|
| 1415 | + unsigned long size; |
---|
| 1416 | + |
---|
| 1417 | + while ((size = atomic64_read(&b->size))) |
---|
| 1418 | + vmballoon_deflate(b, size, false); |
---|
986 | 1419 | } |
---|
987 | 1420 | |
---|
988 | 1421 | /* |
---|
.. | .. |
---|
994 | 1427 | { |
---|
995 | 1428 | int error; |
---|
996 | 1429 | |
---|
| 1430 | + down_write(&b->conf_sem); |
---|
| 1431 | + |
---|
997 | 1432 | vmballoon_vmci_cleanup(b); |
---|
998 | 1433 | |
---|
999 | 1434 | /* free all pages, skipping monitor unlock */ |
---|
1000 | 1435 | vmballoon_pop(b); |
---|
1001 | 1436 | |
---|
1002 | | - if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) |
---|
1003 | | - return; |
---|
| 1437 | + if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) |
---|
| 1438 | + goto unlock; |
---|
1004 | 1439 | |
---|
1005 | 1440 | if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { |
---|
1006 | | - b->ops = &vmballoon_batched_ops; |
---|
1007 | | - b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; |
---|
1008 | | - if (!vmballoon_init_batching(b)) { |
---|
| 1441 | + if (vmballoon_init_batching(b)) { |
---|
1009 | 1442 | /* |
---|
1010 | 1443 | * We failed to initialize batching, inform the monitor |
---|
1011 | 1444 | * about it by sending a null capability. |
---|
.. | .. |
---|
1013 | 1446 | * The guest will retry in one second. |
---|
1014 | 1447 | */ |
---|
1015 | 1448 | vmballoon_send_start(b, 0); |
---|
1016 | | - return; |
---|
| 1449 | + goto unlock; |
---|
1017 | 1450 | } |
---|
1018 | 1451 | } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { |
---|
1019 | | - b->ops = &vmballoon_basic_ops; |
---|
1020 | | - b->batch_max_pages = 1; |
---|
| 1452 | + vmballoon_deinit_batching(b); |
---|
1021 | 1453 | } |
---|
1022 | 1454 | |
---|
| 1455 | + vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET); |
---|
1023 | 1456 | b->reset_required = false; |
---|
1024 | 1457 | |
---|
1025 | 1458 | error = vmballoon_vmci_init(b); |
---|
1026 | 1459 | if (error) |
---|
1027 | 1460 | pr_err("failed to initialize vmci doorbell\n"); |
---|
1028 | 1461 | |
---|
1029 | | - if (!vmballoon_send_guest_id(b)) |
---|
| 1462 | + if (vmballoon_send_guest_id(b)) |
---|
1030 | 1463 | pr_err("failed to send guest ID to the host\n"); |
---|
| 1464 | + |
---|
| 1465 | +unlock: |
---|
| 1466 | + up_write(&b->conf_sem); |
---|
1031 | 1467 | } |
---|
1032 | 1468 | |
---|
1033 | | -/* |
---|
1034 | | - * Balloon work function: reset protocol, if needed, get the new size and |
---|
1035 | | - * adjust balloon as needed. Repeat in 1 sec. |
---|
| 1469 | +/** |
---|
| 1470 | + * vmballoon_work - periodic balloon worker for reset, inflation and deflation. |
---|
| 1471 | + * |
---|
| 1472 | + * @work: pointer to the &work_struct which is provided by the workqueue. |
---|
| 1473 | + * |
---|
| 1474 | + * Resets the protocol if needed, gets the new size and adjusts balloon as |
---|
| 1475 | + * needed. Repeat in 1 sec. |
---|
1036 | 1476 | */ |
---|
1037 | 1477 | static void vmballoon_work(struct work_struct *work) |
---|
1038 | 1478 | { |
---|
1039 | 1479 | struct delayed_work *dwork = to_delayed_work(work); |
---|
1040 | 1480 | struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); |
---|
1041 | | - unsigned int target; |
---|
1042 | | - |
---|
1043 | | - STATS_INC(b->stats.timer); |
---|
| 1481 | + int64_t change = 0; |
---|
1044 | 1482 | |
---|
1045 | 1483 | if (b->reset_required) |
---|
1046 | 1484 | vmballoon_reset(b); |
---|
1047 | 1485 | |
---|
1048 | | - if (!b->reset_required && vmballoon_send_get_target(b, &target)) { |
---|
1049 | | - /* update target, adjust size */ |
---|
1050 | | - b->target = target; |
---|
| 1486 | + down_read(&b->conf_sem); |
---|
1051 | 1487 | |
---|
1052 | | - if (b->size < target) |
---|
| 1488 | + /* |
---|
| 1489 | + * Update the stats while holding the semaphore to ensure that |
---|
| 1490 | + * @stats_enabled is consistent with whether the stats are actually |
---|
| 1491 | + * enabled |
---|
| 1492 | + */ |
---|
| 1493 | + vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); |
---|
| 1494 | + |
---|
| 1495 | + if (!vmballoon_send_get_target(b)) |
---|
| 1496 | + change = vmballoon_change(b); |
---|
| 1497 | + |
---|
| 1498 | + if (change != 0) { |
---|
| 1499 | + pr_debug("%s - size: %llu, target %lu\n", __func__, |
---|
| 1500 | + atomic64_read(&b->size), READ_ONCE(b->target)); |
---|
| 1501 | + |
---|
| 1502 | + if (change > 0) |
---|
1053 | 1503 | vmballoon_inflate(b); |
---|
1054 | | - else if (target == 0 || |
---|
1055 | | - b->size > target + vmballoon_page_size(true)) |
---|
1056 | | - vmballoon_deflate(b); |
---|
| 1504 | + else /* (change < 0) */ |
---|
| 1505 | + vmballoon_deflate(b, 0, true); |
---|
1057 | 1506 | } |
---|
| 1507 | + |
---|
| 1508 | + up_read(&b->conf_sem); |
---|
1058 | 1509 | |
---|
1059 | 1510 | /* |
---|
1060 | 1511 | * We are using a freezable workqueue so that balloon operations are |
---|
.. | .. |
---|
1062 | 1513 | */ |
---|
1063 | 1514 | queue_delayed_work(system_freezable_wq, |
---|
1064 | 1515 | dwork, round_jiffies_relative(HZ)); |
---|
| 1516 | + |
---|
| 1517 | +} |
---|
| 1518 | + |
---|
| 1519 | +/** |
---|
| 1520 | + * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure. |
---|
| 1521 | + * @shrinker: pointer to the balloon shrinker. |
---|
| 1522 | + * @sc: page reclaim information. |
---|
| 1523 | + * |
---|
| 1524 | + * Returns: number of pages that were freed during deflation. |
---|
| 1525 | + */ |
---|
| 1526 | +static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker, |
---|
| 1527 | + struct shrink_control *sc) |
---|
| 1528 | +{ |
---|
| 1529 | + struct vmballoon *b = &balloon; |
---|
| 1530 | + unsigned long deflated_frames; |
---|
| 1531 | + |
---|
| 1532 | + pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size)); |
---|
| 1533 | + |
---|
| 1534 | + vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK); |
---|
| 1535 | + |
---|
| 1536 | + /* |
---|
| 1537 | + * If the lock is also contended for read, we cannot easily reclaim and |
---|
| 1538 | + * we bail out. |
---|
| 1539 | + */ |
---|
| 1540 | + if (!down_read_trylock(&b->conf_sem)) |
---|
| 1541 | + return 0; |
---|
| 1542 | + |
---|
| 1543 | + deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true); |
---|
| 1544 | + |
---|
| 1545 | + vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE, |
---|
| 1546 | + deflated_frames); |
---|
| 1547 | + |
---|
| 1548 | + /* |
---|
| 1549 | + * Delay future inflation for some time to mitigate the situations in |
---|
| 1550 | + * which balloon continuously grows and shrinks. Use WRITE_ONCE() since |
---|
| 1551 | + * the access is asynchronous. |
---|
| 1552 | + */ |
---|
| 1553 | + WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY); |
---|
| 1554 | + |
---|
| 1555 | + up_read(&b->conf_sem); |
---|
| 1556 | + |
---|
| 1557 | + return deflated_frames; |
---|
| 1558 | +} |
---|
| 1559 | + |
---|
| 1560 | +/** |
---|
| 1561 | + * vmballoon_shrinker_count() - return the number of ballooned pages. |
---|
| 1562 | + * @shrinker: pointer to the balloon shrinker. |
---|
| 1563 | + * @sc: page reclaim information. |
---|
| 1564 | + * |
---|
| 1565 | + * Returns: number of 4k pages that are allocated for the balloon and can |
---|
| 1566 | + * therefore be reclaimed under pressure. |
---|
| 1567 | + */ |
---|
| 1568 | +static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker, |
---|
| 1569 | + struct shrink_control *sc) |
---|
| 1570 | +{ |
---|
| 1571 | + struct vmballoon *b = &balloon; |
---|
| 1572 | + |
---|
| 1573 | + return atomic64_read(&b->size); |
---|
| 1574 | +} |
---|
| 1575 | + |
---|
| 1576 | +static void vmballoon_unregister_shrinker(struct vmballoon *b) |
---|
| 1577 | +{ |
---|
| 1578 | + if (b->shrinker_registered) |
---|
| 1579 | + unregister_shrinker(&b->shrinker); |
---|
| 1580 | + b->shrinker_registered = false; |
---|
| 1581 | +} |
---|
| 1582 | + |
---|
| 1583 | +static int vmballoon_register_shrinker(struct vmballoon *b) |
---|
| 1584 | +{ |
---|
| 1585 | + int r; |
---|
| 1586 | + |
---|
| 1587 | + /* Do nothing if the shrinker is not enabled */ |
---|
| 1588 | + if (!vmwballoon_shrinker_enable) |
---|
| 1589 | + return 0; |
---|
| 1590 | + |
---|
| 1591 | + b->shrinker.scan_objects = vmballoon_shrinker_scan; |
---|
| 1592 | + b->shrinker.count_objects = vmballoon_shrinker_count; |
---|
| 1593 | + b->shrinker.seeks = DEFAULT_SEEKS; |
---|
| 1594 | + |
---|
| 1595 | + r = register_shrinker(&b->shrinker); |
---|
| 1596 | + |
---|
| 1597 | + if (r == 0) |
---|
| 1598 | + b->shrinker_registered = true; |
---|
| 1599 | + |
---|
| 1600 | + return r; |
---|
1065 | 1601 | } |
---|
1066 | 1602 | |
---|
1067 | 1603 | /* |
---|
.. | .. |
---|
1069 | 1605 | */ |
---|
1070 | 1606 | #ifdef CONFIG_DEBUG_FS |
---|
1071 | 1607 | |
---|
| 1608 | +static const char * const vmballoon_stat_page_names[] = { |
---|
| 1609 | + [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", |
---|
| 1610 | + [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", |
---|
| 1611 | + [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", |
---|
| 1612 | + [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", |
---|
| 1613 | + [VMW_BALLOON_PAGE_STAT_FREE] = "free" |
---|
| 1614 | +}; |
---|
| 1615 | + |
---|
| 1616 | +static const char * const vmballoon_stat_names[] = { |
---|
| 1617 | + [VMW_BALLOON_STAT_TIMER] = "timer", |
---|
| 1618 | + [VMW_BALLOON_STAT_DOORBELL] = "doorbell", |
---|
| 1619 | + [VMW_BALLOON_STAT_RESET] = "reset", |
---|
| 1620 | + [VMW_BALLOON_STAT_SHRINK] = "shrink", |
---|
| 1621 | + [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree" |
---|
| 1622 | +}; |
---|
| 1623 | + |
---|
| 1624 | +static int vmballoon_enable_stats(struct vmballoon *b) |
---|
| 1625 | +{ |
---|
| 1626 | + int r = 0; |
---|
| 1627 | + |
---|
| 1628 | + down_write(&b->conf_sem); |
---|
| 1629 | + |
---|
| 1630 | + /* did we somehow race with another reader which enabled stats? */ |
---|
| 1631 | + if (b->stats) |
---|
| 1632 | + goto out; |
---|
| 1633 | + |
---|
| 1634 | + b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); |
---|
| 1635 | + |
---|
| 1636 | + if (!b->stats) { |
---|
| 1637 | + /* allocation failed */ |
---|
| 1638 | + r = -ENOMEM; |
---|
| 1639 | + goto out; |
---|
| 1640 | + } |
---|
| 1641 | + static_key_enable(&balloon_stat_enabled.key); |
---|
| 1642 | +out: |
---|
| 1643 | + up_write(&b->conf_sem); |
---|
| 1644 | + return r; |
---|
| 1645 | +} |
---|
| 1646 | + |
---|
| 1647 | +/** |
---|
| 1648 | + * vmballoon_debug_show - shows statistics of balloon operations. |
---|
| 1649 | + * @f: pointer to the &struct seq_file. |
---|
| 1650 | + * @offset: ignored. |
---|
| 1651 | + * |
---|
| 1652 | + * Provides the statistics that can be accessed in vmmemctl in the debugfs. |
---|
| 1653 | + * To avoid the overhead - mainly that of memory - of collecting the statistics, |
---|
| 1654 | + * we only collect statistics after the first time the counters are read. |
---|
| 1655 | + * |
---|
| 1656 | + * Return: zero on success or an error code. |
---|
| 1657 | + */ |
---|
1072 | 1658 | static int vmballoon_debug_show(struct seq_file *f, void *offset) |
---|
1073 | 1659 | { |
---|
1074 | 1660 | struct vmballoon *b = f->private; |
---|
1075 | | - struct vmballoon_stats *stats = &b->stats; |
---|
| 1661 | + int i, j; |
---|
| 1662 | + |
---|
| 1663 | + /* enables stats if they are disabled */ |
---|
| 1664 | + if (!b->stats) { |
---|
| 1665 | + int r = vmballoon_enable_stats(b); |
---|
| 1666 | + |
---|
| 1667 | + if (r) |
---|
| 1668 | + return r; |
---|
| 1669 | + } |
---|
1076 | 1670 | |
---|
1077 | 1671 | /* format capabilities info */ |
---|
1078 | | - seq_printf(f, |
---|
1079 | | - "balloon capabilities: %#4x\n" |
---|
1080 | | - "used capabilities: %#4lx\n" |
---|
1081 | | - "is resetting: %c\n", |
---|
1082 | | - VMW_BALLOON_CAPABILITIES, b->capabilities, |
---|
1083 | | - b->reset_required ? 'y' : 'n'); |
---|
| 1672 | + seq_printf(f, "%-22s: %#16x\n", "balloon capabilities", |
---|
| 1673 | + VMW_BALLOON_CAPABILITIES); |
---|
| 1674 | + seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities); |
---|
| 1675 | + seq_printf(f, "%-22s: %16s\n", "is resetting", |
---|
| 1676 | + b->reset_required ? "y" : "n"); |
---|
1084 | 1677 | |
---|
1085 | 1678 | /* format size info */ |
---|
1086 | | - seq_printf(f, |
---|
1087 | | - "target: %8d pages\n" |
---|
1088 | | - "current: %8d pages\n", |
---|
1089 | | - b->target, b->size); |
---|
| 1679 | + seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target)); |
---|
| 1680 | + seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size)); |
---|
1090 | 1681 | |
---|
1091 | | - seq_printf(f, |
---|
1092 | | - "\n" |
---|
1093 | | - "timer: %8u\n" |
---|
1094 | | - "doorbell: %8u\n" |
---|
1095 | | - "start: %8u (%4u failed)\n" |
---|
1096 | | - "guestType: %8u (%4u failed)\n" |
---|
1097 | | - "2m-lock: %8u (%4u failed)\n" |
---|
1098 | | - "lock: %8u (%4u failed)\n" |
---|
1099 | | - "2m-unlock: %8u (%4u failed)\n" |
---|
1100 | | - "unlock: %8u (%4u failed)\n" |
---|
1101 | | - "target: %8u (%4u failed)\n" |
---|
1102 | | - "prim2mAlloc: %8u (%4u failed)\n" |
---|
1103 | | - "primNoSleepAlloc: %8u (%4u failed)\n" |
---|
1104 | | - "primCanSleepAlloc: %8u (%4u failed)\n" |
---|
1105 | | - "prim2mFree: %8u\n" |
---|
1106 | | - "primFree: %8u\n" |
---|
1107 | | - "err2mAlloc: %8u\n" |
---|
1108 | | - "errAlloc: %8u\n" |
---|
1109 | | - "err2mFree: %8u\n" |
---|
1110 | | - "errFree: %8u\n" |
---|
1111 | | - "doorbellSet: %8u\n" |
---|
1112 | | - "doorbellUnset: %8u\n", |
---|
1113 | | - stats->timer, |
---|
1114 | | - stats->doorbell, |
---|
1115 | | - stats->start, stats->start_fail, |
---|
1116 | | - stats->guest_type, stats->guest_type_fail, |
---|
1117 | | - stats->lock[true], stats->lock_fail[true], |
---|
1118 | | - stats->lock[false], stats->lock_fail[false], |
---|
1119 | | - stats->unlock[true], stats->unlock_fail[true], |
---|
1120 | | - stats->unlock[false], stats->unlock_fail[false], |
---|
1121 | | - stats->target, stats->target_fail, |
---|
1122 | | - stats->alloc[true], stats->alloc_fail[true], |
---|
1123 | | - stats->alloc[false], stats->alloc_fail[false], |
---|
1124 | | - stats->sleep_alloc, stats->sleep_alloc_fail, |
---|
1125 | | - stats->free[true], |
---|
1126 | | - stats->free[false], |
---|
1127 | | - stats->refused_alloc[true], stats->refused_alloc[false], |
---|
1128 | | - stats->refused_free[true], stats->refused_free[false], |
---|
1129 | | - stats->doorbell_set, stats->doorbell_unset); |
---|
| 1682 | + for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { |
---|
| 1683 | + if (vmballoon_cmd_names[i] == NULL) |
---|
| 1684 | + continue; |
---|
1130 | 1685 | |
---|
1131 | | - return 0; |
---|
1132 | | -} |
---|
| 1686 | + seq_printf(f, "%-22s: %16llu (%llu failed)\n", |
---|
| 1687 | + vmballoon_cmd_names[i], |
---|
| 1688 | + atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), |
---|
| 1689 | + atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); |
---|
| 1690 | + } |
---|
1133 | 1691 | |
---|
1134 | | -static int vmballoon_debug_open(struct inode *inode, struct file *file) |
---|
1135 | | -{ |
---|
1136 | | - return single_open(file, vmballoon_debug_show, inode->i_private); |
---|
1137 | | -} |
---|
| 1692 | + for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) |
---|
| 1693 | + seq_printf(f, "%-22s: %16llu\n", |
---|
| 1694 | + vmballoon_stat_names[i], |
---|
| 1695 | + atomic64_read(&b->stats->general_stat[i])); |
---|
1138 | 1696 | |
---|
1139 | | -static const struct file_operations vmballoon_debug_fops = { |
---|
1140 | | - .owner = THIS_MODULE, |
---|
1141 | | - .open = vmballoon_debug_open, |
---|
1142 | | - .read = seq_read, |
---|
1143 | | - .llseek = seq_lseek, |
---|
1144 | | - .release = single_release, |
---|
1145 | | -}; |
---|
1146 | | - |
---|
1147 | | -static int __init vmballoon_debugfs_init(struct vmballoon *b) |
---|
1148 | | -{ |
---|
1149 | | - int error; |
---|
1150 | | - |
---|
1151 | | - b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, |
---|
1152 | | - &vmballoon_debug_fops); |
---|
1153 | | - if (IS_ERR(b->dbg_entry)) { |
---|
1154 | | - error = PTR_ERR(b->dbg_entry); |
---|
1155 | | - pr_err("failed to create debugfs entry, error: %d\n", error); |
---|
1156 | | - return error; |
---|
| 1697 | + for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { |
---|
| 1698 | + for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) |
---|
| 1699 | + seq_printf(f, "%-18s(%s): %16llu\n", |
---|
| 1700 | + vmballoon_stat_page_names[i], |
---|
| 1701 | + vmballoon_page_size_names[j], |
---|
| 1702 | + atomic64_read(&b->stats->page_stat[i][j])); |
---|
1157 | 1703 | } |
---|
1158 | 1704 | |
---|
1159 | 1705 | return 0; |
---|
1160 | 1706 | } |
---|
1161 | 1707 | |
---|
| 1708 | +DEFINE_SHOW_ATTRIBUTE(vmballoon_debug); |
---|
| 1709 | + |
---|
| 1710 | +static void __init vmballoon_debugfs_init(struct vmballoon *b) |
---|
| 1711 | +{ |
---|
| 1712 | + b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, |
---|
| 1713 | + &vmballoon_debug_fops); |
---|
| 1714 | +} |
---|
| 1715 | + |
---|
1162 | 1716 | static void __exit vmballoon_debugfs_exit(struct vmballoon *b) |
---|
1163 | 1717 | { |
---|
| 1718 | + static_key_disable(&balloon_stat_enabled.key); |
---|
1164 | 1719 | debugfs_remove(b->dbg_entry); |
---|
| 1720 | + kfree(b->stats); |
---|
| 1721 | + b->stats = NULL; |
---|
1165 | 1722 | } |
---|
1166 | 1723 | |
---|
1167 | 1724 | #else |
---|
1168 | 1725 | |
---|
1169 | | -static inline int vmballoon_debugfs_init(struct vmballoon *b) |
---|
| 1726 | +static inline void vmballoon_debugfs_init(struct vmballoon *b) |
---|
1170 | 1727 | { |
---|
1171 | | - return 0; |
---|
1172 | 1728 | } |
---|
1173 | 1729 | |
---|
1174 | 1730 | static inline void vmballoon_debugfs_exit(struct vmballoon *b) |
---|
.. | .. |
---|
1177 | 1733 | |
---|
1178 | 1734 | #endif /* CONFIG_DEBUG_FS */ |
---|
1179 | 1735 | |
---|
| 1736 | + |
---|
| 1737 | +#ifdef CONFIG_BALLOON_COMPACTION |
---|
| 1738 | + |
---|
| 1739 | +static int vmballoon_init_fs_context(struct fs_context *fc) |
---|
| 1740 | +{ |
---|
| 1741 | + return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; |
---|
| 1742 | +} |
---|
| 1743 | + |
---|
| 1744 | +static struct file_system_type vmballoon_fs = { |
---|
| 1745 | + .name = "balloon-vmware", |
---|
| 1746 | + .init_fs_context = vmballoon_init_fs_context, |
---|
| 1747 | + .kill_sb = kill_anon_super, |
---|
| 1748 | +}; |
---|
| 1749 | + |
---|
| 1750 | +static struct vfsmount *vmballoon_mnt; |
---|
| 1751 | + |
---|
| 1752 | +/** |
---|
| 1753 | + * vmballoon_migratepage() - migrates a balloon page. |
---|
| 1754 | + * @b_dev_info: balloon device information descriptor. |
---|
| 1755 | + * @newpage: the page to which @page should be migrated. |
---|
| 1756 | + * @page: a ballooned page that should be migrated. |
---|
| 1757 | + * @mode: migration mode, ignored. |
---|
| 1758 | + * |
---|
| 1759 | + * This function is really open-coded, but that is according to the interface |
---|
| 1760 | + * that balloon_compaction provides. |
---|
| 1761 | + * |
---|
| 1762 | + * Return: zero on success, -EAGAIN when migration cannot be performed |
---|
| 1763 | + * momentarily, and -EBUSY if migration failed and should be retried |
---|
| 1764 | + * with that specific page. |
---|
| 1765 | + */ |
---|
| 1766 | +static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, |
---|
| 1767 | + struct page *newpage, struct page *page, |
---|
| 1768 | + enum migrate_mode mode) |
---|
| 1769 | +{ |
---|
| 1770 | + unsigned long status, flags; |
---|
| 1771 | + struct vmballoon *b; |
---|
| 1772 | + int ret; |
---|
| 1773 | + |
---|
| 1774 | + b = container_of(b_dev_info, struct vmballoon, b_dev_info); |
---|
| 1775 | + |
---|
| 1776 | + /* |
---|
| 1777 | + * If the semaphore is taken, there is ongoing configuration change |
---|
| 1778 | + * (i.e., balloon reset), so try again. |
---|
| 1779 | + */ |
---|
| 1780 | + if (!down_read_trylock(&b->conf_sem)) |
---|
| 1781 | + return -EAGAIN; |
---|
| 1782 | + |
---|
| 1783 | + spin_lock(&b->comm_lock); |
---|
| 1784 | + /* |
---|
| 1785 | + * We must start by deflating and not inflating, as otherwise the |
---|
| 1786 | + * hypervisor may tell us that it has enough memory and the new page is |
---|
| 1787 | + * not needed. Since the old page is isolated, we cannot use the list |
---|
| 1788 | + * interface to unlock it, as the LRU field is used for isolation. |
---|
| 1789 | + * Instead, we use the native interface directly. |
---|
| 1790 | + */ |
---|
| 1791 | + vmballoon_add_page(b, 0, page); |
---|
| 1792 | + status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE, |
---|
| 1793 | + VMW_BALLOON_DEFLATE); |
---|
| 1794 | + |
---|
| 1795 | + if (status == VMW_BALLOON_SUCCESS) |
---|
| 1796 | + status = vmballoon_status_page(b, 0, &page); |
---|
| 1797 | + |
---|
| 1798 | + /* |
---|
| 1799 | + * If a failure happened, let the migration mechanism know that it |
---|
| 1800 | + * should not retry. |
---|
| 1801 | + */ |
---|
| 1802 | + if (status != VMW_BALLOON_SUCCESS) { |
---|
| 1803 | + spin_unlock(&b->comm_lock); |
---|
| 1804 | + ret = -EBUSY; |
---|
| 1805 | + goto out_unlock; |
---|
| 1806 | + } |
---|
| 1807 | + |
---|
| 1808 | + /* |
---|
| 1809 | + * The page is isolated, so it is safe to delete it without holding |
---|
| 1810 | + * @pages_lock . We keep holding @comm_lock since we will need it in a |
---|
| 1811 | + * second. |
---|
| 1812 | + */ |
---|
| 1813 | + balloon_page_delete(page); |
---|
| 1814 | + |
---|
| 1815 | + put_page(page); |
---|
| 1816 | + |
---|
| 1817 | + /* Inflate */ |
---|
| 1818 | + vmballoon_add_page(b, 0, newpage); |
---|
| 1819 | + status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE, |
---|
| 1820 | + VMW_BALLOON_INFLATE); |
---|
| 1821 | + |
---|
| 1822 | + if (status == VMW_BALLOON_SUCCESS) |
---|
| 1823 | + status = vmballoon_status_page(b, 0, &newpage); |
---|
| 1824 | + |
---|
| 1825 | + spin_unlock(&b->comm_lock); |
---|
| 1826 | + |
---|
| 1827 | + if (status != VMW_BALLOON_SUCCESS) { |
---|
| 1828 | + /* |
---|
| 1829 | + * A failure happened. While we can deflate the page we just |
---|
| 1830 | + * inflated, this deflation can also encounter an error. Instead |
---|
| 1831 | + * we will decrease the size of the balloon to reflect the |
---|
| 1832 | + * change and report failure. |
---|
| 1833 | + */ |
---|
| 1834 | + atomic64_dec(&b->size); |
---|
| 1835 | + ret = -EBUSY; |
---|
| 1836 | + } else { |
---|
| 1837 | + /* |
---|
| 1838 | + * Success. Take a reference for the page, and we will add it to |
---|
| 1839 | + * the list after acquiring the lock. |
---|
| 1840 | + */ |
---|
| 1841 | + get_page(newpage); |
---|
| 1842 | + ret = MIGRATEPAGE_SUCCESS; |
---|
| 1843 | + } |
---|
| 1844 | + |
---|
| 1845 | + /* Update the balloon list under the @pages_lock */ |
---|
| 1846 | + spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); |
---|
| 1847 | + |
---|
| 1848 | + /* |
---|
| 1849 | + * On inflation success, we already took a reference for the @newpage. |
---|
| 1850 | + * If we succeed just insert it to the list and update the statistics |
---|
| 1851 | + * under the lock. |
---|
| 1852 | + */ |
---|
| 1853 | + if (ret == MIGRATEPAGE_SUCCESS) { |
---|
| 1854 | + balloon_page_insert(&b->b_dev_info, newpage); |
---|
| 1855 | + __count_vm_event(BALLOON_MIGRATE); |
---|
| 1856 | + } |
---|
| 1857 | + |
---|
| 1858 | + /* |
---|
| 1859 | + * We deflated successfully, so regardless to the inflation success, we |
---|
| 1860 | + * need to reduce the number of isolated_pages. |
---|
| 1861 | + */ |
---|
| 1862 | + b->b_dev_info.isolated_pages--; |
---|
| 1863 | + spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags); |
---|
| 1864 | + |
---|
| 1865 | +out_unlock: |
---|
| 1866 | + up_read(&b->conf_sem); |
---|
| 1867 | + return ret; |
---|
| 1868 | +} |
---|
| 1869 | + |
---|
| 1870 | +/** |
---|
| 1871 | + * vmballoon_compaction_deinit() - removes compaction related data. |
---|
| 1872 | + * |
---|
| 1873 | + * @b: pointer to the balloon. |
---|
| 1874 | + */ |
---|
| 1875 | +static void vmballoon_compaction_deinit(struct vmballoon *b) |
---|
| 1876 | +{ |
---|
| 1877 | + if (!IS_ERR(b->b_dev_info.inode)) |
---|
| 1878 | + iput(b->b_dev_info.inode); |
---|
| 1879 | + |
---|
| 1880 | + b->b_dev_info.inode = NULL; |
---|
| 1881 | + kern_unmount(vmballoon_mnt); |
---|
| 1882 | + vmballoon_mnt = NULL; |
---|
| 1883 | +} |
---|
| 1884 | + |
---|
| 1885 | +/** |
---|
| 1886 | + * vmballoon_compaction_init() - initialized compaction for the balloon. |
---|
| 1887 | + * |
---|
| 1888 | + * @b: pointer to the balloon. |
---|
| 1889 | + * |
---|
| 1890 | + * If during the initialization a failure occurred, this function does not |
---|
| 1891 | + * perform cleanup. The caller must call vmballoon_compaction_deinit() in this |
---|
| 1892 | + * case. |
---|
| 1893 | + * |
---|
| 1894 | + * Return: zero on success or error code on failure. |
---|
| 1895 | + */ |
---|
| 1896 | +static __init int vmballoon_compaction_init(struct vmballoon *b) |
---|
| 1897 | +{ |
---|
| 1898 | + vmballoon_mnt = kern_mount(&vmballoon_fs); |
---|
| 1899 | + if (IS_ERR(vmballoon_mnt)) |
---|
| 1900 | + return PTR_ERR(vmballoon_mnt); |
---|
| 1901 | + |
---|
| 1902 | + b->b_dev_info.migratepage = vmballoon_migratepage; |
---|
| 1903 | + b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); |
---|
| 1904 | + |
---|
| 1905 | + if (IS_ERR(b->b_dev_info.inode)) |
---|
| 1906 | + return PTR_ERR(b->b_dev_info.inode); |
---|
| 1907 | + |
---|
| 1908 | + b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops; |
---|
| 1909 | + return 0; |
---|
| 1910 | +} |
---|
| 1911 | + |
---|
| 1912 | +#else /* CONFIG_BALLOON_COMPACTION */ |
---|
| 1913 | + |
---|
| 1914 | +static void vmballoon_compaction_deinit(struct vmballoon *b) |
---|
| 1915 | +{ |
---|
| 1916 | +} |
---|
| 1917 | + |
---|
| 1918 | +static int vmballoon_compaction_init(struct vmballoon *b) |
---|
| 1919 | +{ |
---|
| 1920 | + return 0; |
---|
| 1921 | +} |
---|
| 1922 | + |
---|
| 1923 | +#endif /* CONFIG_BALLOON_COMPACTION */ |
---|
| 1924 | + |
---|
1180 | 1925 | static int __init vmballoon_init(void) |
---|
1181 | 1926 | { |
---|
1182 | 1927 | int error; |
---|
1183 | | - unsigned is_2m_pages; |
---|
| 1928 | + |
---|
1184 | 1929 | /* |
---|
1185 | 1930 | * Check if we are running on VMware's hypervisor and bail out |
---|
1186 | 1931 | * if we are not. |
---|
.. | .. |
---|
1188 | 1933 | if (x86_hyper_type != X86_HYPER_VMWARE) |
---|
1189 | 1934 | return -ENODEV; |
---|
1190 | 1935 | |
---|
1191 | | - for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; |
---|
1192 | | - is_2m_pages++) { |
---|
1193 | | - INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); |
---|
1194 | | - INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); |
---|
1195 | | - } |
---|
1196 | | - |
---|
1197 | 1936 | INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); |
---|
1198 | 1937 | |
---|
1199 | | - error = vmballoon_debugfs_init(&balloon); |
---|
| 1938 | + error = vmballoon_register_shrinker(&balloon); |
---|
1200 | 1939 | if (error) |
---|
1201 | | - return error; |
---|
| 1940 | + goto fail; |
---|
1202 | 1941 | |
---|
| 1942 | + /* |
---|
| 1943 | + * Initialization of compaction must be done after the call to |
---|
| 1944 | + * balloon_devinfo_init() . |
---|
| 1945 | + */ |
---|
| 1946 | + balloon_devinfo_init(&balloon.b_dev_info); |
---|
| 1947 | + error = vmballoon_compaction_init(&balloon); |
---|
| 1948 | + if (error) |
---|
| 1949 | + goto fail; |
---|
| 1950 | + |
---|
| 1951 | + INIT_LIST_HEAD(&balloon.huge_pages); |
---|
| 1952 | + spin_lock_init(&balloon.comm_lock); |
---|
| 1953 | + init_rwsem(&balloon.conf_sem); |
---|
1203 | 1954 | balloon.vmci_doorbell = VMCI_INVALID_HANDLE; |
---|
1204 | 1955 | balloon.batch_page = NULL; |
---|
1205 | 1956 | balloon.page = NULL; |
---|
.. | .. |
---|
1207 | 1958 | |
---|
1208 | 1959 | queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); |
---|
1209 | 1960 | |
---|
| 1961 | + vmballoon_debugfs_init(&balloon); |
---|
| 1962 | + |
---|
1210 | 1963 | return 0; |
---|
| 1964 | +fail: |
---|
| 1965 | + vmballoon_unregister_shrinker(&balloon); |
---|
| 1966 | + vmballoon_compaction_deinit(&balloon); |
---|
| 1967 | + return error; |
---|
1211 | 1968 | } |
---|
1212 | 1969 | |
---|
1213 | 1970 | /* |
---|
.. | .. |
---|
1220 | 1977 | |
---|
1221 | 1978 | static void __exit vmballoon_exit(void) |
---|
1222 | 1979 | { |
---|
| 1980 | + vmballoon_unregister_shrinker(&balloon); |
---|
1223 | 1981 | vmballoon_vmci_cleanup(&balloon); |
---|
1224 | 1982 | cancel_delayed_work_sync(&balloon.dwork); |
---|
1225 | 1983 | |
---|
.. | .. |
---|
1232 | 1990 | */ |
---|
1233 | 1991 | vmballoon_send_start(&balloon, 0); |
---|
1234 | 1992 | vmballoon_pop(&balloon); |
---|
| 1993 | + |
---|
| 1994 | + /* Only once we popped the balloon, compaction can be deinit */ |
---|
| 1995 | + vmballoon_compaction_deinit(&balloon); |
---|
1235 | 1996 | } |
---|
1236 | 1997 | module_exit(vmballoon_exit); |
---|