~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,11 +1,11 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2		- * linux/mm/vmalloc.c
3		- *
4	3	* Copyright (C) 1993 Linus Torvalds
5	4	* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
6	5	* SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
7	6	* Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
8	7	* Numa awareness, Christoph Lameter, SGI, June 2005
	8	+ * Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019
9	9	*/
10	10
11	11	#include <linux/vmalloc.h>
..	..	@@ -18,12 +18,13 @@
18	18	#include <linux/interrupt.h>
19	19	#include <linux/proc_fs.h>
20	20	#include <linux/seq_file.h>
	21	+#include <linux/set_memory.h>
21	22	#include <linux/debugobjects.h>
22	23	#include <linux/kallsyms.h>
23	24	#include <linux/list.h>
24	25	#include <linux/notifier.h>
25	26	#include <linux/rbtree.h>
26		-#include <linux/radix-tree.h>
	27	+#include <linux/xarray.h>
27	28	#include <linux/rcupdate.h>
28	29	#include <linux/pfn.h>
29	30	#include <linux/kmemleak.h>
..	..	@@ -31,13 +32,24 @@
31	32	#include <linux/compiler.h>
32	33	#include <linux/llist.h>
33	34	#include <linux/bitops.h>
	35	+#include <linux/rbtree_augmented.h>
34	36	#include <linux/overflow.h>
	37	+#include <trace/hooks/mm.h>
35	38
36	39	#include <linux/uaccess.h>
37	40	#include <asm/tlbflush.h>
38	41	#include <asm/shmparam.h>
39	42
40	43	#include "internal.h"
	44	+#include "pgalloc-track.h"
	45	+
	46	+bool is_vmalloc_addr(const void *x)
	47	+{
	48	+ unsigned long addr = (unsigned long)x;
	49	+
	50	+ return addr >= VMALLOC_START && addr < VMALLOC_END;
	51	+}
	52	+EXPORT_SYMBOL(is_vmalloc_addr);
41	53
42	54	struct vfree_deferred {
43	55	struct llist_head list;
..	..	@@ -58,7 +70,8 @@
58	70
59	71	/* Page table manipulation functions */
60	72
61		-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
	73	+static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
	74	+ pgtbl_mod_mask *mask)
62	75	{
63	76	pte_t *pte;
64	77
..	..	@@ -67,73 +80,119 @@
67	80	pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
68	81	WARN_ON(!pte_none(ptent) && !pte_present(ptent));
69	82	} while (pte++, addr += PAGE_SIZE, addr != end);
	83	+ *mask \|= PGTBL_PTE_MODIFIED;
70	84	}
71	85
72		-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
	86	+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
	87	+ pgtbl_mod_mask *mask)
73	88	{
74	89	pmd_t *pmd;
75	90	unsigned long next;
	91	+ int cleared;
76	92
77	93	pmd = pmd_offset(pud, addr);
78	94	do {
79	95	next = pmd_addr_end(addr, end);
80		- if (pmd_clear_huge(pmd))
	96	+
	97	+ cleared = pmd_clear_huge(pmd);
	98	+ if (cleared \|\| pmd_bad(*pmd))
	99	+ *mask \|= PGTBL_PMD_MODIFIED;
	100	+
	101	+ if (cleared)
81	102	continue;
82	103	if (pmd_none_or_clear_bad(pmd))
83	104	continue;
84		- vunmap_pte_range(pmd, addr, next);
	105	+ vunmap_pte_range(pmd, addr, next, mask);
	106	+
	107	+ cond_resched();
85	108	} while (pmd++, addr = next, addr != end);
86	109	}
87	110
88		-static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
	111	+static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
	112	+ pgtbl_mod_mask *mask)
89	113	{
90	114	pud_t *pud;
91	115	unsigned long next;
	116	+ int cleared;
92	117
93	118	pud = pud_offset(p4d, addr);
94	119	do {
95	120	next = pud_addr_end(addr, end);
96		- if (pud_clear_huge(pud))
	121	+
	122	+ cleared = pud_clear_huge(pud);
	123	+ if (cleared \|\| pud_bad(*pud))
	124	+ *mask \|= PGTBL_PUD_MODIFIED;
	125	+
	126	+ if (cleared)
97	127	continue;
98	128	if (pud_none_or_clear_bad(pud))
99	129	continue;
100		- vunmap_pmd_range(pud, addr, next);
	130	+ vunmap_pmd_range(pud, addr, next, mask);
101	131	} while (pud++, addr = next, addr != end);
102	132	}
103	133
104		-static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
	134	+static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
	135	+ pgtbl_mod_mask *mask)
105	136	{
106	137	p4d_t *p4d;
107	138	unsigned long next;
	139	+ int cleared;
108	140
109	141	p4d = p4d_offset(pgd, addr);
110	142	do {
111	143	next = p4d_addr_end(addr, end);
112		- if (p4d_clear_huge(p4d))
	144	+
	145	+ cleared = p4d_clear_huge(p4d);
	146	+ if (cleared \|\| p4d_bad(*p4d))
	147	+ *mask \|= PGTBL_P4D_MODIFIED;
	148	+
	149	+ if (cleared)
113	150	continue;
114	151	if (p4d_none_or_clear_bad(p4d))
115	152	continue;
116		- vunmap_pud_range(p4d, addr, next);
	153	+ vunmap_pud_range(p4d, addr, next, mask);
117	154	} while (p4d++, addr = next, addr != end);
118	155	}
119	156
120		-static void vunmap_page_range(unsigned long addr, unsigned long end)
	157	+/**
	158	+ * unmap_kernel_range_noflush - unmap kernel VM area
	159	+ * @start: start of the VM area to unmap
	160	+ * @size: size of the VM area to unmap
	161	+ *
	162	+ * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify
	163	+ * should have been allocated using get_vm_area() and its friends.
	164	+ *
	165	+ * NOTE:
	166	+ * This function does NOT do any cache flushing. The caller is responsible
	167	+ * for calling flush_cache_vunmap() on to-be-mapped areas before calling this
	168	+ * function and flush_tlb_kernel_range() after.
	169	+ */
	170	+void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
121	171	{
122		- pgd_t *pgd;
	172	+ unsigned long end = start + size;
123	173	unsigned long next;
	174	+ pgd_t *pgd;
	175	+ unsigned long addr = start;
	176	+ pgtbl_mod_mask mask = 0;
124	177
125	178	BUG_ON(addr >= end);
126	179	pgd = pgd_offset_k(addr);
127	180	do {
128	181	next = pgd_addr_end(addr, end);
	182	+ if (pgd_bad(*pgd))
	183	+ mask \|= PGTBL_PGD_MODIFIED;
129	184	if (pgd_none_or_clear_bad(pgd))
130	185	continue;
131		- vunmap_p4d_range(pgd, addr, next);
	186	+ vunmap_p4d_range(pgd, addr, next, &mask);
132	187	} while (pgd++, addr = next, addr != end);
	188	+
	189	+ if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
	190	+ arch_sync_kernel_mappings(start, end);
133	191	}
134	192
135	193	static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
136		- unsigned long end, pgprot_t prot, struct page *pages, int nr)
	194	+ unsigned long end, pgprot_t prot, struct page *pages, int nr,
	195	+ pgtbl_mod_mask *mask)
137	196	{
138	197	pte_t *pte;
139	198
..	..	@@ -142,7 +201,7 @@
142	201	* callers keep track of where we're up to.
143	202	*/
144	203
145		- pte = pte_alloc_kernel(pmd, addr);
	204	+ pte = pte_alloc_kernel_track(pmd, addr, mask);
146	205	if (!pte)
147	206	return -ENOMEM;
148	207	do {
..	..	@@ -155,96 +214,120 @@
155	214	set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
156	215	(*nr)++;
157	216	} while (pte++, addr += PAGE_SIZE, addr != end);
	217	+ *mask \|= PGTBL_PTE_MODIFIED;
158	218	return 0;
159	219	}
160	220
161	221	static int vmap_pmd_range(pud_t *pud, unsigned long addr,
162		- unsigned long end, pgprot_t prot, struct page *pages, int nr)
	222	+ unsigned long end, pgprot_t prot, struct page *pages, int nr,
	223	+ pgtbl_mod_mask *mask)
163	224	{
164	225	pmd_t *pmd;
165	226	unsigned long next;
166	227
167		- pmd = pmd_alloc(&init_mm, pud, addr);
	228	+ pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
168	229	if (!pmd)
169	230	return -ENOMEM;
170	231	do {
171	232	next = pmd_addr_end(addr, end);
172		- if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
	233	+ if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
173	234	return -ENOMEM;
174	235	} while (pmd++, addr = next, addr != end);
175	236	return 0;
176	237	}
177	238
178	239	static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
179		- unsigned long end, pgprot_t prot, struct page *pages, int nr)
	240	+ unsigned long end, pgprot_t prot, struct page *pages, int nr,
	241	+ pgtbl_mod_mask *mask)
180	242	{
181	243	pud_t *pud;
182	244	unsigned long next;
183	245
184		- pud = pud_alloc(&init_mm, p4d, addr);
	246	+ pud = pud_alloc_track(&init_mm, p4d, addr, mask);
185	247	if (!pud)
186	248	return -ENOMEM;
187	249	do {
188	250	next = pud_addr_end(addr, end);
189		- if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
	251	+ if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
190	252	return -ENOMEM;
191	253	} while (pud++, addr = next, addr != end);
192	254	return 0;
193	255	}
194	256
195	257	static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
196		- unsigned long end, pgprot_t prot, struct page *pages, int nr)
	258	+ unsigned long end, pgprot_t prot, struct page *pages, int nr,
	259	+ pgtbl_mod_mask *mask)
197	260	{
198	261	p4d_t *p4d;
199	262	unsigned long next;
200	263
201		- p4d = p4d_alloc(&init_mm, pgd, addr);
	264	+ p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
202	265	if (!p4d)
203	266	return -ENOMEM;
204	267	do {
205	268	next = p4d_addr_end(addr, end);
206		- if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
	269	+ if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
207	270	return -ENOMEM;
208	271	} while (p4d++, addr = next, addr != end);
209	272	return 0;
210	273	}
211	274
212		-/*
213		- * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
214		- * will have pfns corresponding to the "pages" array.
	275	+/**
	276	+ * map_kernel_range_noflush - map kernel VM area with the specified pages
	277	+ * @addr: start of the VM area to map
	278	+ * @size: size of the VM area to map
	279	+ * @prot: page protection flags to use
	280	+ * @pages: pages to map
215	281	*
216		- * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
	282	+ * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should
	283	+ * have been allocated using get_vm_area() and its friends.
	284	+ *
	285	+ * NOTE:
	286	+ * This function does NOT do any cache flushing. The caller is responsible for
	287	+ * calling flush_cache_vmap() on to-be-mapped areas before calling this
	288	+ * function.
	289	+ *
	290	+ * RETURNS:
	291	+ * 0 on success, -errno on failure.
217	292	*/
218		-static int vmap_page_range_noflush(unsigned long start, unsigned long end,
219		- pgprot_t prot, struct page **pages)
	293	+int map_kernel_range_noflush(unsigned long addr, unsigned long size,
	294	+ pgprot_t prot, struct page **pages)
220	295	{
221		- pgd_t *pgd;
	296	+ unsigned long start = addr;
	297	+ unsigned long end = addr + size;
222	298	unsigned long next;
223		- unsigned long addr = start;
	299	+ pgd_t *pgd;
224	300	int err = 0;
225	301	int nr = 0;
	302	+ pgtbl_mod_mask mask = 0;
226	303
227	304	BUG_ON(addr >= end);
228	305	pgd = pgd_offset_k(addr);
229	306	do {
230	307	next = pgd_addr_end(addr, end);
231		- err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
	308	+ if (pgd_bad(*pgd))
	309	+ mask \|= PGTBL_PGD_MODIFIED;
	310	+ err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
232	311	if (err)
233	312	return err;
234	313	} while (pgd++, addr = next, addr != end);
235	314
236		- return nr;
	315	+ if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
	316	+ arch_sync_kernel_mappings(start, end);
	317	+
	318	+ return 0;
237	319	}
238	320
239		-static int vmap_page_range(unsigned long start, unsigned long end,
240		- pgprot_t prot, struct page **pages)
	321	+int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
	322	+ struct page **pages)
241	323	{
242	324	int ret;
243	325
244		- ret = vmap_page_range_noflush(start, end, prot, pages);
245		- flush_cache_vmap(start, end);
	326	+ ret = map_kernel_range_noflush(start, size, prot, pages);
	327	+ flush_cache_vmap(start, start + size);
246	328	return ret;
247	329	}
	330	+EXPORT_SYMBOL_GPL(map_kernel_range);
248	331
249	332	int is_vmalloc_or_module_addr(const void *x)
250	333	{
..	..	@@ -324,22 +407,83 @@
324	407
325	408	/* Global kva allocator */
326	409
327		-#define VM_LAZY_FREE 0x02
328		-#define VM_VM_AREA 0x04
	410	+#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
	411	+#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
	412	+
329	413
330	414	static DEFINE_SPINLOCK(vmap_area_lock);
	415	+static DEFINE_SPINLOCK(free_vmap_area_lock);
331	416	/* Export for kexec only */
332	417	LIST_HEAD(vmap_area_list);
333	418	static LLIST_HEAD(vmap_purge_list);
334	419	static struct rb_root vmap_area_root = RB_ROOT;
	420	+static bool vmap_initialized __read_mostly;
335	421
336		-/* The vmap cache globals are protected by vmap_area_lock */
337		-static struct rb_node *free_vmap_cache;
338		-static unsigned long cached_hole_size;
339		-static unsigned long cached_vstart;
340		-static unsigned long cached_align;
	422	+/*
	423	+ * This kmem_cache is used for vmap_area objects. Instead of
	424	+ * allocating from slab we reuse an object from this cache to
	425	+ * make things faster. Especially in "no edge" splitting of
	426	+ * free block.
	427	+ */
	428	+static struct kmem_cache *vmap_area_cachep;
341	429
342		-static unsigned long vmap_area_pcpu_hole;
	430	+/*
	431	+ * This linked list is used in pair with free_vmap_area_root.
	432	+ * It gives O(1) access to prev/next to perform fast coalescing.
	433	+ */
	434	+static LIST_HEAD(free_vmap_area_list);
	435	+
	436	+/*
	437	+ * This augment red-black tree represents the free vmap space.
	438	+ * All vmap_area objects in this tree are sorted by va->va_start
	439	+ * address. It is used for allocation and merging when a vmap
	440	+ * object is released.
	441	+ *
	442	+ * Each vmap_area node contains a maximum available free block
	443	+ * of its sub-tree, right or left. Therefore it is possible to
	444	+ * find a lowest match of free area.
	445	+ */
	446	+static struct rb_root free_vmap_area_root = RB_ROOT;
	447	+
	448	+/*
	449	+ * Preload a CPU with one object for "no edge" split case. The
	450	+ * aim is to get rid of allocations from the atomic context, thus
	451	+ * to use more permissive allocation masks.
	452	+ */
	453	+static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
	454	+
	455	+static __always_inline unsigned long
	456	+va_size(struct vmap_area *va)
	457	+{
	458	+ return (va->va_end - va->va_start);
	459	+}
	460	+
	461	+static __always_inline unsigned long
	462	+get_subtree_max_size(struct rb_node *node)
	463	+{
	464	+ struct vmap_area *va;
	465	+
	466	+ va = rb_entry_safe(node, struct vmap_area, rb_node);
	467	+ return va ? va->subtree_max_size : 0;
	468	+}
	469	+
	470	+/*
	471	+ * Gets called when remove the node and rotate.
	472	+ */
	473	+static __always_inline unsigned long
	474	+compute_subtree_max_size(struct vmap_area *va)
	475	+{
	476	+ return max3(va_size(va),
	477	+ get_subtree_max_size(va->rb_node.rb_left),
	478	+ get_subtree_max_size(va->rb_node.rb_right));
	479	+}
	480	+
	481	+RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
	482	+ struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
	483	+
	484	+static void purge_vmap_area_lazy(void);
	485	+static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
	486	+static unsigned long lazy_max_pages(void);
343	487
344	488	static atomic_long_t nr_vmalloc_pages;
345	489
..	..	@@ -347,6 +491,7 @@
347	491	{
348	492	return atomic_long_read(&nr_vmalloc_pages);
349	493	}
	494	+EXPORT_SYMBOL_GPL(vmalloc_nr_pages);
350	495
351	496	static struct vmap_area *__find_vmap_area(unsigned long addr)
352	497	{
..	..	@@ -367,41 +512,638 @@
367	512	return NULL;
368	513	}
369	514
370		-static void __insert_vmap_area(struct vmap_area *va)
	515	+/*
	516	+ * This function returns back addresses of parent node
	517	+ * and its left or right link for further processing.
	518	+ *
	519	+ * Otherwise NULL is returned. In that case all further
	520	+ * steps regarding inserting of conflicting overlap range
	521	+ * have to be declined and actually considered as a bug.
	522	+ */
	523	+static __always_inline struct rb_node **
	524	+find_va_links(struct vmap_area *va,
	525	+ struct rb_root root, struct rb_node from,
	526	+ struct rb_node **parent)
371	527	{
372		- struct rb_node **p = &vmap_area_root.rb_node;
373		- struct rb_node *parent = NULL;
374		- struct rb_node *tmp;
	528	+ struct vmap_area *tmp_va;
	529	+ struct rb_node **link;
375	530
376		- while (*p) {
377		- struct vmap_area *tmp_va;
378		-
379		- parent = *p;
380		- tmp_va = rb_entry(parent, struct vmap_area, rb_node);
381		- if (va->va_start < tmp_va->va_end)
382		- p = &(*p)->rb_left;
383		- else if (va->va_end > tmp_va->va_start)
384		- p = &(*p)->rb_right;
385		- else
386		- BUG();
	531	+ if (root) {
	532	+ link = &root->rb_node;
	533	+ if (unlikely(!*link)) {
	534	+ *parent = NULL;
	535	+ return link;
	536	+ }
	537	+ } else {
	538	+ link = &from;
387	539	}
388	540
389		- rb_link_node(&va->rb_node, parent, p);
390		- rb_insert_color(&va->rb_node, &vmap_area_root);
	541	+ /*
	542	+ * Go to the bottom of the tree. When we hit the last point
	543	+ * we end up with parent rb_node and correct direction, i name
	544	+ * it link, where the new va->rb_node will be attached to.
	545	+ */
	546	+ do {
	547	+ tmp_va = rb_entry(*link, struct vmap_area, rb_node);
391	548
392		- /* address-sort this list */
393		- tmp = rb_prev(&va->rb_node);
394		- if (tmp) {
395		- struct vmap_area *prev;
396		- prev = rb_entry(tmp, struct vmap_area, rb_node);
397		- list_add_rcu(&va->list, &prev->list);
398		- } else
399		- list_add_rcu(&va->list, &vmap_area_list);
	549	+ /*
	550	+ * During the traversal we also do some sanity check.
	551	+ * Trigger the BUG() if there are sides(left/right)
	552	+ * or full overlaps.
	553	+ */
	554	+ if (va->va_start < tmp_va->va_end &&
	555	+ va->va_end <= tmp_va->va_start)
	556	+ link = &(*link)->rb_left;
	557	+ else if (va->va_end > tmp_va->va_start &&
	558	+ va->va_start >= tmp_va->va_end)
	559	+ link = &(*link)->rb_right;
	560	+ else {
	561	+ WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
	562	+ va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
	563	+
	564	+ return NULL;
	565	+ }
	566	+ } while (*link);
	567	+
	568	+ *parent = &tmp_va->rb_node;
	569	+ return link;
400	570	}
401	571
402		-static void purge_vmap_area_lazy(void);
	572	+static __always_inline struct list_head *
	573	+get_va_next_sibling(struct rb_node parent, struct rb_node *link)
	574	+{
	575	+ struct list_head *list;
403	576
404		-static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
	577	+ if (unlikely(!parent))
	578	+ /*
	579	+ * The red-black tree where we try to find VA neighbors
	580	+ * before merging or inserting is empty, i.e. it means
	581	+ * there is no free vmap space. Normally it does not
	582	+ * happen but we handle this case anyway.
	583	+ */
	584	+ return NULL;
	585	+
	586	+ list = &rb_entry(parent, struct vmap_area, rb_node)->list;
	587	+ return (&parent->rb_right == link ? list->next : list);
	588	+}
	589	+
	590	+static __always_inline void
	591	+link_va(struct vmap_area va, struct rb_root root,
	592	+ struct rb_node parent, struct rb_node link, struct list_head head)
	593	+{
	594	+ /*
	595	+ * VA is still not in the list, but we can
	596	+ * identify its future previous list_head node.
	597	+ */
	598	+ if (likely(parent)) {
	599	+ head = &rb_entry(parent, struct vmap_area, rb_node)->list;
	600	+ if (&parent->rb_right != link)
	601	+ head = head->prev;
	602	+ }
	603	+
	604	+ /* Insert to the rb-tree */
	605	+ rb_link_node(&va->rb_node, parent, link);
	606	+ if (root == &free_vmap_area_root) {
	607	+ /*
	608	+ * Some explanation here. Just perform simple insertion
	609	+ * to the tree. We do not set va->subtree_max_size to
	610	+ * its current size before calling rb_insert_augmented().
	611	+ * It is because of we populate the tree from the bottom
	612	+ * to parent levels when the node _is_ in the tree.
	613	+ *
	614	+ * Therefore we set subtree_max_size to zero after insertion,
	615	+ * to let __augment_tree_propagate_from() puts everything to
	616	+ * the correct order later on.
	617	+ */
	618	+ rb_insert_augmented(&va->rb_node,
	619	+ root, &free_vmap_area_rb_augment_cb);
	620	+ va->subtree_max_size = 0;
	621	+ } else {
	622	+ rb_insert_color(&va->rb_node, root);
	623	+ }
	624	+
	625	+ /* Address-sort this list */
	626	+ list_add(&va->list, head);
	627	+}
	628	+
	629	+static __always_inline void
	630	+unlink_va(struct vmap_area va, struct rb_root root)
	631	+{
	632	+ if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
	633	+ return;
	634	+
	635	+ if (root == &free_vmap_area_root)
	636	+ rb_erase_augmented(&va->rb_node,
	637	+ root, &free_vmap_area_rb_augment_cb);
	638	+ else
	639	+ rb_erase(&va->rb_node, root);
	640	+
	641	+ list_del(&va->list);
	642	+ RB_CLEAR_NODE(&va->rb_node);
	643	+}
	644	+
	645	+#if DEBUG_AUGMENT_PROPAGATE_CHECK
	646	+static void
	647	+augment_tree_propagate_check(void)
	648	+{
	649	+ struct vmap_area *va;
	650	+ unsigned long computed_size;
	651	+
	652	+ list_for_each_entry(va, &free_vmap_area_list, list) {
	653	+ computed_size = compute_subtree_max_size(va);
	654	+ if (computed_size != va->subtree_max_size)
	655	+ pr_emerg("tree is corrupted: %lu, %lu\n",
	656	+ va_size(va), va->subtree_max_size);
	657	+ }
	658	+}
	659	+#endif
	660	+
	661	+/*
	662	+ * This function populates subtree_max_size from bottom to upper
	663	+ * levels starting from VA point. The propagation must be done
	664	+ * when VA size is modified by changing its va_start/va_end. Or
	665	+ * in case of newly inserting of VA to the tree.
	666	+ *
	667	+ * It means that __augment_tree_propagate_from() must be called:
	668	+ * - After VA has been inserted to the tree(free path);
	669	+ * - After VA has been shrunk(allocation path);
	670	+ * - After VA has been increased(merging path).
	671	+ *
	672	+ * Please note that, it does not mean that upper parent nodes
	673	+ * and their subtree_max_size are recalculated all the time up
	674	+ * to the root node.
	675	+ *
	676	+ * 4--8
	677	+ * /\
	678	+ * / \
	679	+ * / \
	680	+ * 2--2 8--8
	681	+ *
	682	+ * For example if we modify the node 4, shrinking it to 2, then
	683	+ * no any modification is required. If we shrink the node 2 to 1
	684	+ * its subtree_max_size is updated only, and set to 1. If we shrink
	685	+ * the node 8 to 6, then its subtree_max_size is set to 6 and parent
	686	+ * node becomes 4--6.
	687	+ */
	688	+static __always_inline void
	689	+augment_tree_propagate_from(struct vmap_area *va)
	690	+{
	691	+ /*
	692	+ * Populate the tree from bottom towards the root until
	693	+ * the calculated maximum available size of checked node
	694	+ * is equal to its current one.
	695	+ */
	696	+ free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
	697	+
	698	+#if DEBUG_AUGMENT_PROPAGATE_CHECK
	699	+ augment_tree_propagate_check();
	700	+#endif
	701	+}
	702	+
	703	+static void
	704	+insert_vmap_area(struct vmap_area *va,
	705	+ struct rb_root root, struct list_head head)
	706	+{
	707	+ struct rb_node **link;
	708	+ struct rb_node *parent;
	709	+
	710	+ link = find_va_links(va, root, NULL, &parent);
	711	+ if (link)
	712	+ link_va(va, root, parent, link, head);
	713	+}
	714	+
	715	+static void
	716	+insert_vmap_area_augment(struct vmap_area *va,
	717	+ struct rb_node from, struct rb_root root,
	718	+ struct list_head *head)
	719	+{
	720	+ struct rb_node **link;
	721	+ struct rb_node *parent;
	722	+
	723	+ if (from)
	724	+ link = find_va_links(va, NULL, from, &parent);
	725	+ else
	726	+ link = find_va_links(va, root, NULL, &parent);
	727	+
	728	+ if (link) {
	729	+ link_va(va, root, parent, link, head);
	730	+ augment_tree_propagate_from(va);
	731	+ }
	732	+}
	733	+
	734	+/*
	735	+ * Merge de-allocated chunk of VA memory with previous
	736	+ * and next free blocks. If coalesce is not done a new
	737	+ * free area is inserted. If VA has been merged, it is
	738	+ * freed.
	739	+ *
	740	+ * Please note, it can return NULL in case of overlap
	741	+ * ranges, followed by WARN() report. Despite it is a
	742	+ * buggy behaviour, a system can be alive and keep
	743	+ * ongoing.
	744	+ */
	745	+static __always_inline struct vmap_area *
	746	+merge_or_add_vmap_area(struct vmap_area *va,
	747	+ struct rb_root root, struct list_head head)
	748	+{
	749	+ struct vmap_area *sibling;
	750	+ struct list_head *next;
	751	+ struct rb_node **link;
	752	+ struct rb_node *parent;
	753	+ bool merged = false;
	754	+
	755	+ /*
	756	+ * Find a place in the tree where VA potentially will be
	757	+ * inserted, unless it is merged with its sibling/siblings.
	758	+ */
	759	+ link = find_va_links(va, root, NULL, &parent);
	760	+ if (!link)
	761	+ return NULL;
	762	+
	763	+ /*
	764	+ * Get next node of VA to check if merging can be done.
	765	+ */
	766	+ next = get_va_next_sibling(parent, link);
	767	+ if (unlikely(next == NULL))
	768	+ goto insert;
	769	+
	770	+ /*
	771	+ * start end
	772	+ * \| \|
	773	+ * \|<------VA------>\|<-----Next----->\|
	774	+ * \| \|
	775	+ * start end
	776	+ */
	777	+ if (next != head) {
	778	+ sibling = list_entry(next, struct vmap_area, list);
	779	+ if (sibling->va_start == va->va_end) {
	780	+ sibling->va_start = va->va_start;
	781	+
	782	+ /* Free vmap_area object. */
	783	+ kmem_cache_free(vmap_area_cachep, va);
	784	+
	785	+ /* Point to the new merged area. */
	786	+ va = sibling;
	787	+ merged = true;
	788	+ }
	789	+ }
	790	+
	791	+ /*
	792	+ * start end
	793	+ * \| \|
	794	+ * \|<-----Prev----->\|<------VA------>\|
	795	+ * \| \|
	796	+ * start end
	797	+ */
	798	+ if (next->prev != head) {
	799	+ sibling = list_entry(next->prev, struct vmap_area, list);
	800	+ if (sibling->va_end == va->va_start) {
	801	+ /*
	802	+ * If both neighbors are coalesced, it is important
	803	+ * to unlink the "next" node first, followed by merging
	804	+ * with "previous" one. Otherwise the tree might not be
	805	+ * fully populated if a sibling's augmented value is
	806	+ * "normalized" because of rotation operations.
	807	+ */
	808	+ if (merged)
	809	+ unlink_va(va, root);
	810	+
	811	+ sibling->va_end = va->va_end;
	812	+
	813	+ /* Free vmap_area object. */
	814	+ kmem_cache_free(vmap_area_cachep, va);
	815	+
	816	+ /* Point to the new merged area. */
	817	+ va = sibling;
	818	+ merged = true;
	819	+ }
	820	+ }
	821	+
	822	+insert:
	823	+ if (!merged)
	824	+ link_va(va, root, parent, link, head);
	825	+
	826	+ /*
	827	+ * Last step is to check and update the tree.
	828	+ */
	829	+ augment_tree_propagate_from(va);
	830	+ return va;
	831	+}
	832	+
	833	+static __always_inline bool
	834	+is_within_this_va(struct vmap_area *va, unsigned long size,
	835	+ unsigned long align, unsigned long vstart)
	836	+{
	837	+ unsigned long nva_start_addr;
	838	+
	839	+ if (va->va_start > vstart)
	840	+ nva_start_addr = ALIGN(va->va_start, align);
	841	+ else
	842	+ nva_start_addr = ALIGN(vstart, align);
	843	+
	844	+ /* Can be overflowed due to big size or alignment. */
	845	+ if (nva_start_addr + size < nva_start_addr \|\|
	846	+ nva_start_addr < vstart)
	847	+ return false;
	848	+
	849	+ return (nva_start_addr + size <= va->va_end);
	850	+}
	851	+
	852	+/*
	853	+ * Find the first free block(lowest start address) in the tree,
	854	+ * that will accomplish the request corresponding to passing
	855	+ * parameters.
	856	+ */
	857	+static __always_inline struct vmap_area *
	858	+find_vmap_lowest_match(unsigned long size,
	859	+ unsigned long align, unsigned long vstart)
	860	+{
	861	+ struct vmap_area *va;
	862	+ struct rb_node *node;
	863	+ unsigned long length;
	864	+
	865	+ /* Start from the root. */
	866	+ node = free_vmap_area_root.rb_node;
	867	+
	868	+ /* Adjust the search size for alignment overhead. */
	869	+ length = size + align - 1;
	870	+
	871	+ while (node) {
	872	+ va = rb_entry(node, struct vmap_area, rb_node);
	873	+
	874	+ if (get_subtree_max_size(node->rb_left) >= length &&
	875	+ vstart < va->va_start) {
	876	+ node = node->rb_left;
	877	+ } else {
	878	+ if (is_within_this_va(va, size, align, vstart))
	879	+ return va;
	880	+
	881	+ /*
	882	+ * Does not make sense to go deeper towards the right
	883	+ * sub-tree if it does not have a free block that is
	884	+ * equal or bigger to the requested search length.
	885	+ */
	886	+ if (get_subtree_max_size(node->rb_right) >= length) {
	887	+ node = node->rb_right;
	888	+ continue;
	889	+ }
	890	+
	891	+ /*
	892	+ * OK. We roll back and find the first right sub-tree,
	893	+ * that will satisfy the search criteria. It can happen
	894	+ * only once due to "vstart" restriction.
	895	+ */
	896	+ while ((node = rb_parent(node))) {
	897	+ va = rb_entry(node, struct vmap_area, rb_node);
	898	+ if (is_within_this_va(va, size, align, vstart))
	899	+ return va;
	900	+
	901	+ if (get_subtree_max_size(node->rb_right) >= length &&
	902	+ vstart <= va->va_start) {
	903	+ node = node->rb_right;
	904	+ break;
	905	+ }
	906	+ }
	907	+ }
	908	+ }
	909	+
	910	+ return NULL;
	911	+}
	912	+
	913	+#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
	914	+#include <linux/random.h>
	915	+
	916	+static struct vmap_area *
	917	+find_vmap_lowest_linear_match(unsigned long size,
	918	+ unsigned long align, unsigned long vstart)
	919	+{
	920	+ struct vmap_area *va;
	921	+
	922	+ list_for_each_entry(va, &free_vmap_area_list, list) {
	923	+ if (!is_within_this_va(va, size, align, vstart))
	924	+ continue;
	925	+
	926	+ return va;
	927	+ }
	928	+
	929	+ return NULL;
	930	+}
	931	+
	932	+static void
	933	+find_vmap_lowest_match_check(unsigned long size)
	934	+{
	935	+ struct vmap_area va_1, va_2;
	936	+ unsigned long vstart;
	937	+ unsigned int rnd;
	938	+
	939	+ get_random_bytes(&rnd, sizeof(rnd));
	940	+ vstart = VMALLOC_START + rnd;
	941	+
	942	+ va_1 = find_vmap_lowest_match(size, 1, vstart);
	943	+ va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
	944	+
	945	+ if (va_1 != va_2)
	946	+ pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
	947	+ va_1, va_2, vstart);
	948	+}
	949	+#endif
	950	+
	951	+enum fit_type {
	952	+ NOTHING_FIT = 0,
	953	+ FL_FIT_TYPE = 1, /* full fit */
	954	+ LE_FIT_TYPE = 2, /* left edge fit */
	955	+ RE_FIT_TYPE = 3, /* right edge fit */
	956	+ NE_FIT_TYPE = 4 /* no edge fit */
	957	+};
	958	+
	959	+static __always_inline enum fit_type
	960	+classify_va_fit_type(struct vmap_area *va,
	961	+ unsigned long nva_start_addr, unsigned long size)
	962	+{
	963	+ enum fit_type type;
	964	+
	965	+ /* Check if it is within VA. */
	966	+ if (nva_start_addr < va->va_start \|\|
	967	+ nva_start_addr + size > va->va_end)
	968	+ return NOTHING_FIT;
	969	+
	970	+ /* Now classify. */
	971	+ if (va->va_start == nva_start_addr) {
	972	+ if (va->va_end == nva_start_addr + size)
	973	+ type = FL_FIT_TYPE;
	974	+ else
	975	+ type = LE_FIT_TYPE;
	976	+ } else if (va->va_end == nva_start_addr + size) {
	977	+ type = RE_FIT_TYPE;
	978	+ } else {
	979	+ type = NE_FIT_TYPE;
	980	+ }
	981	+
	982	+ return type;
	983	+}
	984	+
	985	+static __always_inline int
	986	+adjust_va_to_fit_type(struct vmap_area *va,
	987	+ unsigned long nva_start_addr, unsigned long size,
	988	+ enum fit_type type)
	989	+{
	990	+ struct vmap_area *lva = NULL;
	991	+
	992	+ if (type == FL_FIT_TYPE) {
	993	+ /*
	994	+ * No need to split VA, it fully fits.
	995	+ *
	996	+ * \| \|
	997	+ * V NVA V
	998	+ * \|---------------\|
	999	+ */
	1000	+ unlink_va(va, &free_vmap_area_root);
	1001	+ kmem_cache_free(vmap_area_cachep, va);
	1002	+ } else if (type == LE_FIT_TYPE) {
	1003	+ /*
	1004	+ * Split left edge of fit VA.
	1005	+ *
	1006	+ * \| \|
	1007	+ * V NVA V R
	1008	+ * \|-------\|-------\|
	1009	+ */
	1010	+ va->va_start += size;
	1011	+ } else if (type == RE_FIT_TYPE) {
	1012	+ /*
	1013	+ * Split right edge of fit VA.
	1014	+ *
	1015	+ * \| \|
	1016	+ * L V NVA V
	1017	+ * \|-------\|-------\|
	1018	+ */
	1019	+ va->va_end = nva_start_addr;
	1020	+ } else if (type == NE_FIT_TYPE) {
	1021	+ /*
	1022	+ * Split no edge of fit VA.
	1023	+ *
	1024	+ * \| \|
	1025	+ * L V NVA V R
	1026	+ * \|---\|-------\|---\|
	1027	+ */
	1028	+ lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
	1029	+ if (unlikely(!lva)) {
	1030	+ /*
	1031	+ * For percpu allocator we do not do any pre-allocation
	1032	+ * and leave it as it is. The reason is it most likely
	1033	+ * never ends up with NE_FIT_TYPE splitting. In case of
	1034	+ * percpu allocations offsets and sizes are aligned to
	1035	+ * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
	1036	+ * are its main fitting cases.
	1037	+ *
	1038	+ * There are a few exceptions though, as an example it is
	1039	+ * a first allocation (early boot up) when we have "one"
	1040	+ * big free space that has to be split.
	1041	+ *
	1042	+ * Also we can hit this path in case of regular "vmap"
	1043	+ * allocations, if "this" current CPU was not preloaded.
	1044	+ * See the comment in alloc_vmap_area() why. If so, then
	1045	+ * GFP_NOWAIT is used instead to get an extra object for
	1046	+ * split purpose. That is rare and most time does not
	1047	+ * occur.
	1048	+ *
	1049	+ * What happens if an allocation gets failed. Basically,
	1050	+ * an "overflow" path is triggered to purge lazily freed
	1051	+ * areas to free some memory, then, the "retry" path is
	1052	+ * triggered to repeat one more time. See more details
	1053	+ * in alloc_vmap_area() function.
	1054	+ */
	1055	+ lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
	1056	+ if (!lva)
	1057	+ return -1;
	1058	+ }
	1059	+
	1060	+ /*
	1061	+ * Build the remainder.
	1062	+ */
	1063	+ lva->va_start = va->va_start;
	1064	+ lva->va_end = nva_start_addr;
	1065	+
	1066	+ /*
	1067	+ * Shrink this VA to remaining size.
	1068	+ */
	1069	+ va->va_start = nva_start_addr + size;
	1070	+ } else {
	1071	+ return -1;
	1072	+ }
	1073	+
	1074	+ if (type != FL_FIT_TYPE) {
	1075	+ augment_tree_propagate_from(va);
	1076	+
	1077	+ if (lva) /* type == NE_FIT_TYPE */
	1078	+ insert_vmap_area_augment(lva, &va->rb_node,
	1079	+ &free_vmap_area_root, &free_vmap_area_list);
	1080	+ }
	1081	+
	1082	+ return 0;
	1083	+}
	1084	+
	1085	+/*
	1086	+ * Returns a start address of the newly allocated area, if success.
	1087	+ * Otherwise a vend is returned that indicates failure.
	1088	+ */
	1089	+static __always_inline unsigned long
	1090	+__alloc_vmap_area(unsigned long size, unsigned long align,
	1091	+ unsigned long vstart, unsigned long vend)
	1092	+{
	1093	+ unsigned long nva_start_addr;
	1094	+ struct vmap_area *va;
	1095	+ enum fit_type type;
	1096	+ int ret;
	1097	+
	1098	+ va = find_vmap_lowest_match(size, align, vstart);
	1099	+ if (unlikely(!va))
	1100	+ return vend;
	1101	+
	1102	+ if (va->va_start > vstart)
	1103	+ nva_start_addr = ALIGN(va->va_start, align);
	1104	+ else
	1105	+ nva_start_addr = ALIGN(vstart, align);
	1106	+
	1107	+ /* Check the "vend" restriction. */
	1108	+ if (nva_start_addr + size > vend)
	1109	+ return vend;
	1110	+
	1111	+ /* Classify what we have found. */
	1112	+ type = classify_va_fit_type(va, nva_start_addr, size);
	1113	+ if (WARN_ON_ONCE(type == NOTHING_FIT))
	1114	+ return vend;
	1115	+
	1116	+ /* Update the free vmap_area. */
	1117	+ ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
	1118	+ if (ret)
	1119	+ return vend;
	1120	+
	1121	+#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
	1122	+ find_vmap_lowest_match_check(size);
	1123	+#endif
	1124	+
	1125	+ return nva_start_addr;
	1126	+}
	1127	+
	1128	+/*
	1129	+ * Free a region of KVA allocated by alloc_vmap_area
	1130	+ */
	1131	+static void free_vmap_area(struct vmap_area *va)
	1132	+{
	1133	+ /*
	1134	+ * Remove from the busy tree/list.
	1135	+ */
	1136	+ spin_lock(&vmap_area_lock);
	1137	+ unlink_va(va, &vmap_area_root);
	1138	+ spin_unlock(&vmap_area_lock);
	1139	+
	1140	+ /*
	1141	+ * Insert/Merge it back to the free tree/list.
	1142	+ */
	1143	+ spin_lock(&free_vmap_area_lock);
	1144	+ merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
	1145	+ spin_unlock(&free_vmap_area_lock);
	1146	+}
405	1147
406	1148	/*
407	1149	* Allocate a region of KVA of the specified size and alignment, within the
..	..	@@ -412,20 +1154,22 @@
412	1154	unsigned long vstart, unsigned long vend,
413	1155	int node, gfp_t gfp_mask)
414	1156	{
415		- struct vmap_area *va;
416		- struct rb_node *n;
	1157	+ struct vmap_area va, pva;
417	1158	unsigned long addr;
418	1159	int purged = 0;
419		- struct vmap_area *first;
	1160	+ int ret;
420	1161
421	1162	BUG_ON(!size);
422	1163	BUG_ON(offset_in_page(size));
423	1164	BUG_ON(!is_power_of_2(align));
424	1165
425		- might_sleep();
	1166	+ if (unlikely(!vmap_initialized))
	1167	+ return ERR_PTR(-EBUSY);
426	1168
427		- va = kmalloc_node(sizeof(struct vmap_area),
428		- gfp_mask & GFP_RECLAIM_MASK, node);
	1169	+ might_sleep();
	1170	+ gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
	1171	+
	1172	+ va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
429	1173	if (unlikely(!va))
430	1174	return ERR_PTR(-ENOMEM);
431	1175
..	..	@@ -433,101 +1177,71 @@
433	1177	* Only scan the relevant parts containing pointers to other objects
434	1178	* to avoid false negatives.
435	1179	*/
436		- kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
	1180	+ kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
437	1181
438	1182	retry:
439		- spin_lock(&vmap_area_lock);
440	1183	/*
441		- * Invalidate cache if we have more permissive parameters.
442		- * cached_hole_size notes the largest hole noticed _below_
443		- * the vmap_area cached in free_vmap_cache: if size fits
444		- * into that hole, we want to scan from vstart to reuse
445		- * the hole instead of allocating above free_vmap_cache.
446		- * Note that __free_vmap_area may update free_vmap_cache
447		- * without updating cached_hole_size or cached_align.
	1184	+ * Preload this CPU with one extra vmap_area object. It is used
	1185	+ * when fit type of free area is NE_FIT_TYPE. Please note, it
	1186	+ * does not guarantee that an allocation occurs on a CPU that
	1187	+ * is preloaded, instead we minimize the case when it is not.
	1188	+ * It can happen because of cpu migration, because there is a
	1189	+ * race until the below spinlock is taken.
	1190	+ *
	1191	+ * The preload is done in non-atomic context, thus it allows us
	1192	+ * to use more permissive allocation masks to be more stable under
	1193	+ * low memory condition and high memory pressure. In rare case,
	1194	+ * if not preloaded, GFP_NOWAIT is used.
	1195	+ *
	1196	+ * Set "pva" to NULL here, because of "retry" path.
448	1197	*/
449		- if (!free_vmap_cache \|\|
450		- size < cached_hole_size \|\|
451		- vstart < cached_vstart \|\|
452		- align < cached_align) {
453		-nocache:
454		- cached_hole_size = 0;
455		- free_vmap_cache = NULL;
456		- }
457		- /* record if we encounter less permissive parameters */
458		- cached_vstart = vstart;
459		- cached_align = align;
	1198	+ pva = NULL;
460	1199
461		- /* find starting point for our search */
462		- if (free_vmap_cache) {
463		- first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
464		- addr = ALIGN(first->va_end, align);
465		- if (addr < vstart)
466		- goto nocache;
467		- if (addr + size < addr)
468		- goto overflow;
	1200	+ if (!this_cpu_read(ne_fit_preload_node))
	1201	+ /*
	1202	+ * Even if it fails we do not really care about that.
	1203	+ * Just proceed as it is. If needed "overflow" path
	1204	+ * will refill the cache we allocate from.
	1205	+ */
	1206	+ pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
469	1207
470		- } else {
471		- addr = ALIGN(vstart, align);
472		- if (addr + size < addr)
473		- goto overflow;
	1208	+ spin_lock(&free_vmap_area_lock);
474	1209
475		- n = vmap_area_root.rb_node;
476		- first = NULL;
	1210	+ if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
	1211	+ kmem_cache_free(vmap_area_cachep, pva);
477	1212
478		- while (n) {
479		- struct vmap_area *tmp;
480		- tmp = rb_entry(n, struct vmap_area, rb_node);
481		- if (tmp->va_end >= addr) {
482		- first = tmp;
483		- if (tmp->va_start <= addr)
484		- break;
485		- n = n->rb_left;
486		- } else
487		- n = n->rb_right;
488		- }
489		-
490		- if (!first)
491		- goto found;
492		- }
493		-
494		- /* from the starting point, walk areas until a suitable hole is found */
495		- while (addr + size > first->va_start && addr + size <= vend) {
496		- if (addr + cached_hole_size < first->va_start)
497		- cached_hole_size = first->va_start - addr;
498		- addr = ALIGN(first->va_end, align);
499		- if (addr + size < addr)
500		- goto overflow;
501		-
502		- if (list_is_last(&first->list, &vmap_area_list))
503		- goto found;
504		-
505		- first = list_next_entry(first, list);
506		- }
507		-
508		-found:
509	1213	/*
510		- * Check also calculated address against the vstart,
511		- * because it can be 0 because of big align request.
	1214	+ * If an allocation fails, the "vend" address is
	1215	+ * returned. Therefore trigger the overflow path.
512	1216	*/
513		- if (addr + size > vend \|\| addr < vstart)
	1217	+ addr = __alloc_vmap_area(size, align, vstart, vend);
	1218	+ spin_unlock(&free_vmap_area_lock);
	1219	+
	1220	+ if (unlikely(addr == vend))
514	1221	goto overflow;
515	1222
516	1223	va->va_start = addr;
517	1224	va->va_end = addr + size;
518		- va->flags = 0;
519		- __insert_vmap_area(va);
520		- free_vmap_cache = &va->rb_node;
	1225	+ va->vm = NULL;
	1226	+
	1227	+
	1228	+ spin_lock(&vmap_area_lock);
	1229	+ insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
521	1230	spin_unlock(&vmap_area_lock);
522	1231
523	1232	BUG_ON(!IS_ALIGNED(va->va_start, align));
524	1233	BUG_ON(va->va_start < vstart);
525	1234	BUG_ON(va->va_end > vend);
526	1235
	1236	+ ret = kasan_populate_vmalloc(addr, size);
	1237	+ if (ret) {
	1238	+ free_vmap_area(va);
	1239	+ return ERR_PTR(ret);
	1240	+ }
	1241	+
527	1242	return va;
528	1243
529	1244	overflow:
530		- spin_unlock(&vmap_area_lock);
531	1245	if (!purged) {
532	1246	purge_vmap_area_lazy();
533	1247	purged = 1;
..	..	@@ -546,7 +1260,8 @@
546	1260	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
547	1261	pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
548	1262	size);
549		- kfree(va);
	1263	+
	1264	+ kmem_cache_free(vmap_area_cachep, va);
550	1265	return ERR_PTR(-EBUSY);
551	1266	}
552	1267
..	..	@@ -562,59 +1277,7 @@
562	1277	}
563	1278	EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
564	1279
565		-static void __free_vmap_area(struct vmap_area *va)
566		-{
567		- BUG_ON(RB_EMPTY_NODE(&va->rb_node));
568		-
569		- if (free_vmap_cache) {
570		- if (va->va_end < cached_vstart) {
571		- free_vmap_cache = NULL;
572		- } else {
573		- struct vmap_area *cache;
574		- cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
575		- if (va->va_start <= cache->va_start) {
576		- free_vmap_cache = rb_prev(&va->rb_node);
577		- /*
578		- * We don't try to update cached_hole_size or
579		- * cached_align, but it won't go very wrong.
580		- */
581		- }
582		- }
583		- }
584		- rb_erase(&va->rb_node, &vmap_area_root);
585		- RB_CLEAR_NODE(&va->rb_node);
586		- list_del_rcu(&va->list);
587		-
588		- /*
589		- * Track the highest possible candidate for pcpu area
590		- * allocation. Areas outside of vmalloc area can be returned
591		- * here too, consider only end addresses which fall inside
592		- * vmalloc area proper.
593		- */
594		- if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
595		- vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
596		-
597		- kfree_rcu(va, rcu_head);
598		-}
599		-
600		-/*
601		- * Free a region of KVA allocated by alloc_vmap_area
602		- */
603		-static void free_vmap_area(struct vmap_area *va)
604		-{
605		- spin_lock(&vmap_area_lock);
606		- __free_vmap_area(va);
607		- spin_unlock(&vmap_area_lock);
608		-}
609		-
610		-/*
611		- * Clear the pagetable entries of a given vmap_area
612		- */
613		-static void unmap_vmap_area(struct vmap_area *va)
614		-{
615		- vunmap_page_range(va->va_start, va->va_end);
616		-}
617		-
	1280	+bool lazy_vunmap_enable __read_mostly = true;
618	1281	/*
619	1282	* lazy_max_pages is the maximum amount of virtual address space we gather up
620	1283	* before attempting to purge with a TLB flush.
..	..	@@ -635,12 +1298,15 @@
635	1298	{
636	1299	unsigned int log;
637	1300
	1301	+ if (!lazy_vunmap_enable)
	1302	+ return 0;
	1303	+
638	1304	log = fls(num_online_cpus());
639	1305
640	1306	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
641	1307	}
642	1308
643		-static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
	1309	+static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
644	1310
645	1311	/*
646	1312	* Serialize vmap purging. There is no actual criticial section protected
..	..	@@ -658,7 +1324,7 @@
658	1324	*/
659	1325	void set_iounmap_nonlazy(void)
660	1326	{
661		- atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
	1327	+ atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
662	1328	}
663	1329
664	1330	/*
..	..	@@ -666,36 +1332,58 @@
666	1332	*/
667	1333	static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
668	1334	{
	1335	+ unsigned long resched_threshold;
669	1336	struct llist_node *valist;
670	1337	struct vmap_area *va;
671	1338	struct vmap_area *n_va;
672		- bool do_free = false;
673	1339
674	1340	lockdep_assert_held(&vmap_purge_lock);
675	1341
676	1342	valist = llist_del_all(&vmap_purge_list);
	1343	+ if (unlikely(valist == NULL))
	1344	+ return false;
	1345	+
	1346	+ /*
	1347	+ * TODO: to calculate a flush range without looping.
	1348	+ * The list can be up to lazy_max_pages() elements.
	1349	+ */
677	1350	llist_for_each_entry(va, valist, purge_list) {
678	1351	if (va->va_start < start)
679	1352	start = va->va_start;
680	1353	if (va->va_end > end)
681	1354	end = va->va_end;
682		- do_free = true;
683	1355	}
684		-
685		- if (!do_free)
686		- return false;
687	1356
688	1357	flush_tlb_kernel_range(start, end);
	1358	+ resched_threshold = lazy_max_pages() << 1;
689	1359
690		- spin_lock(&vmap_area_lock);
	1360	+ spin_lock(&free_vmap_area_lock);
691	1361	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
692		- int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
	1362	+ unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
	1363	+ unsigned long orig_start = va->va_start;
	1364	+ unsigned long orig_end = va->va_end;
693	1365
694		- __free_vmap_area(va);
695		- atomic_sub(nr, &vmap_lazy_nr);
696		- cond_resched_lock(&vmap_area_lock);
	1366	+ /*
	1367	+ * Finally insert or merge lazily-freed area. It is
	1368	+ * detached and there is no need to "unlink" it from
	1369	+ * anything.
	1370	+ */
	1371	+ va = merge_or_add_vmap_area(va, &free_vmap_area_root,
	1372	+ &free_vmap_area_list);
	1373	+
	1374	+ if (!va)
	1375	+ continue;
	1376	+
	1377	+ if (is_vmalloc_or_module_addr((void *)orig_start))
	1378	+ kasan_release_vmalloc(orig_start, orig_end,
	1379	+ va->va_start, va->va_end);
	1380	+
	1381	+ atomic_long_sub(nr, &vmap_lazy_nr);
	1382	+
	1383	+ if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
	1384	+ cond_resched_lock(&free_vmap_area_lock);
697	1385	}
698		- spin_unlock(&vmap_area_lock);
	1386	+ spin_unlock(&free_vmap_area_lock);
699	1387	return true;
700	1388	}
701	1389
..	..	@@ -729,10 +1417,14 @@
729	1417	*/
730	1418	static void free_vmap_area_noflush(struct vmap_area *va)
731	1419	{
732		- int nr_lazy;
	1420	+ unsigned long nr_lazy;
733	1421
734		- nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
735		- &vmap_lazy_nr);
	1422	+ spin_lock(&vmap_area_lock);
	1423	+ unlink_va(va, &vmap_area_root);
	1424	+ spin_unlock(&vmap_area_lock);
	1425	+
	1426	+ nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
	1427	+ PAGE_SHIFT, &vmap_lazy_nr);
736	1428
737	1429	/* After this point, we may free va at any time */
738	1430	llist_add(&va->purge_list, &vmap_purge_list);
..	..	@@ -747,8 +1439,8 @@
747	1439	static void free_unmap_vmap_area(struct vmap_area *va)
748	1440	{
749	1441	flush_cache_vunmap(va->va_start, va->va_end);
750		- unmap_vmap_area(va);
751		- if (debug_pagealloc_enabled())
	1442	+ unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start);
	1443	+ if (debug_pagealloc_enabled_static())
752	1444	flush_tlb_kernel_range(va->va_start, va->va_end);
753	1445
754	1446	free_vmap_area_noflush(va);
..	..	@@ -795,8 +1487,6 @@
795	1487
796	1488	#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
797	1489
798		-static bool vmap_initialized __read_mostly = false;
799		-
800	1490	struct vmap_block_queue {
801	1491	spinlock_t lock;
802	1492	struct list_head free;
..	..	@@ -816,12 +1506,11 @@
816	1506	static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
817	1507
818	1508	/*
819		- * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
	1509	+ * XArray of vmap blocks, indexed by address, to quickly find a vmap block
820	1510	* in the free path. Could get rid of this if we change the API to return a
821	1511	* "cookie" from alloc, to be passed to free. But no big deal yet.
822	1512	*/
823		-static DEFINE_SPINLOCK(vmap_block_tree_lock);
824		-static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
	1513	+static DEFINE_XARRAY(vmap_blocks);
825	1514
826	1515	/*
827	1516	* We should probably have a fallback mechanism to allocate virtual memory
..	..	@@ -852,7 +1541,7 @@
852	1541	* @order: how many 2^order pages should be occupied in newly allocated block
853	1542	* @gfp_mask: flags for the page level allocator
854	1543	*
855		- * Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
	1544	+ * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
856	1545	*/
857	1546	static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
858	1547	{
..	..	@@ -878,13 +1567,6 @@
878	1567	return ERR_CAST(va);
879	1568	}
880	1569
881		- err = radix_tree_preload(gfp_mask);
882		- if (unlikely(err)) {
883		- kfree(vb);
884		- free_vmap_area(va);
885		- return ERR_PTR(err);
886		- }
887		-
888	1570	vaddr = vmap_block_vaddr(va->va_start, 0);
889	1571	spin_lock_init(&vb->lock);
890	1572	vb->va = va;
..	..	@@ -897,11 +1579,12 @@
897	1579	INIT_LIST_HEAD(&vb->free_list);
898	1580
899	1581	vb_idx = addr_to_vb_idx(va->va_start);
900		- spin_lock(&vmap_block_tree_lock);
901		- err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
902		- spin_unlock(&vmap_block_tree_lock);
903		- BUG_ON(err);
904		- radix_tree_preload_end();
	1582	+ err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
	1583	+ if (err) {
	1584	+ kfree(vb);
	1585	+ free_vmap_area(va);
	1586	+ return ERR_PTR(err);
	1587	+ }
905	1588
906	1589	vbq = &get_cpu_var(vmap_block_queue);
907	1590	spin_lock(&vbq->lock);
..	..	@@ -915,12 +1598,8 @@
915	1598	static void free_vmap_block(struct vmap_block *vb)
916	1599	{
917	1600	struct vmap_block *tmp;
918		- unsigned long vb_idx;
919	1601
920		- vb_idx = addr_to_vb_idx(vb->va->va_start);
921		- spin_lock(&vmap_block_tree_lock);
922		- tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
923		- spin_unlock(&vmap_block_tree_lock);
	1602	+ tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
924	1603	BUG_ON(tmp != vb);
925	1604
926	1605	free_vmap_area_noflush(vb->va);
..	..	@@ -1023,34 +1702,25 @@
1023	1702	return vaddr;
1024	1703	}
1025	1704
1026		-static void vb_free(const void *addr, unsigned long size)
	1705	+static void vb_free(unsigned long addr, unsigned long size)
1027	1706	{
1028	1707	unsigned long offset;
1029		- unsigned long vb_idx;
1030	1708	unsigned int order;
1031	1709	struct vmap_block *vb;
1032	1710
1033	1711	BUG_ON(offset_in_page(size));
1034	1712	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
1035	1713
1036		- flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
	1714	+ flush_cache_vunmap(addr, addr + size);
1037	1715
1038	1716	order = get_order(size);
	1717	+ offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
	1718	+ vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
1039	1719
1040		- offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
1041		- offset >>= PAGE_SHIFT;
	1720	+ unmap_kernel_range_noflush(addr, size);
1042	1721
1043		- vb_idx = addr_to_vb_idx((unsigned long)addr);
1044		- rcu_read_lock();
1045		- vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
1046		- rcu_read_unlock();
1047		- BUG_ON(!vb);
1048		-
1049		- vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
1050		-
1051		- if (debug_pagealloc_enabled())
1052		- flush_tlb_kernel_range((unsigned long)addr,
1053		- (unsigned long)addr + size);
	1722	+ if (debug_pagealloc_enabled_static())
	1723	+ flush_tlb_kernel_range(addr, addr + size);
1054	1724
1055	1725	spin_lock(&vb->lock);
1056	1726
..	..	@@ -1067,24 +1737,9 @@
1067	1737	spin_unlock(&vb->lock);
1068	1738	}
1069	1739
1070		-/**
1071		- * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
1072		- *
1073		- * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
1074		- * to amortize TLB flushing overheads. What this means is that any page you
1075		- * have now, may, in a former life, have been mapped into kernel virtual
1076		- * address by the vmap layer and so there might be some CPUs with TLB entries
1077		- * still referencing that page (additional to the regular 1:1 kernel mapping).
1078		- *
1079		- * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
1080		- * be sure that none of the pages we have control over will have any aliases
1081		- * from the vmap layer.
1082		- */
1083		-void vm_unmap_aliases(void)
	1740	+static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
1084	1741	{
1085		- unsigned long start = ULONG_MAX, end = 0;
1086	1742	int cpu;
1087		- int flush = 0;
1088	1743
1089	1744	if (unlikely(!vmap_initialized))
1090	1745	return;
..	..	@@ -1098,7 +1753,7 @@
1098	1753	rcu_read_lock();
1099	1754	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1100	1755	spin_lock(&vb->lock);
1101		- if (vb->dirty) {
	1756	+ if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) {
1102	1757	unsigned long va_start = vb->va->va_start;
1103	1758	unsigned long s, e;
1104	1759
..	..	@@ -1121,6 +1776,27 @@
1121	1776	flush_tlb_kernel_range(start, end);
1122	1777	mutex_unlock(&vmap_purge_lock);
1123	1778	}
	1779	+
	1780	+/**
	1781	+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
	1782	+ *
	1783	+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
	1784	+ * to amortize TLB flushing overheads. What this means is that any page you
	1785	+ * have now, may, in a former life, have been mapped into kernel virtual
	1786	+ * address by the vmap layer and so there might be some CPUs with TLB entries
	1787	+ * still referencing that page (additional to the regular 1:1 kernel mapping).
	1788	+ *
	1789	+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
	1790	+ * be sure that none of the pages we have control over will have any aliases
	1791	+ * from the vmap layer.
	1792	+ */
	1793	+void vm_unmap_aliases(void)
	1794	+{
	1795	+ unsigned long start = ULONG_MAX, end = 0;
	1796	+ int flush = 0;
	1797	+
	1798	+ _vm_unmap_aliases(start, end, flush);
	1799	+}
1124	1800	EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1125	1801
1126	1802	/**
..	..	@@ -1140,9 +1816,11 @@
1140	1816	BUG_ON(addr > VMALLOC_END);
1141	1817	BUG_ON(!PAGE_ALIGNED(addr));
1142	1818
	1819	+ kasan_poison_vmalloc(mem, size);
	1820	+
1143	1821	if (likely(count <= VMAP_MAX_ALLOC)) {
1144	1822	debug_check_no_locks_freed(mem, size);
1145		- vb_free(mem, size);
	1823	+ vb_free(addr, size);
1146	1824	return;
1147	1825	}
1148	1826
..	..	@@ -1159,7 +1837,6 @@
1159	1837	* @pages: an array of pointers to the pages to be mapped
1160	1838	* @count: number of pages
1161	1839	* @node: prefer to allocate data structures on this node
1162		- * @prot: memory protection to use. PAGE_KERNEL for regular RAM
1163	1840	*
1164	1841	* If you use this function for less than VMAP_MAX_ALLOC pages, it could be
1165	1842	* faster than vmap so it's good. But if you mix long-life and short-life
..	..	@@ -1169,7 +1846,7 @@
1169	1846	*
1170	1847	* Returns: a pointer to the address that has been mapped, or %NULL on failure
1171	1848	*/
1172		-void vm_map_ram(struct page *pages, unsigned int count, int node, pgprot_t prot)
	1849	+void vm_map_ram(struct page *pages, unsigned int count, int node)
1173	1850	{
1174	1851	unsigned long size = (unsigned long)count << PAGE_SHIFT;
1175	1852	unsigned long addr;
..	..	@@ -1190,7 +1867,10 @@
1190	1867	addr = va->va_start;
1191	1868	mem = (void *)addr;
1192	1869	}
1193		- if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
	1870	+
	1871	+ kasan_unpoison_vmalloc(mem, size);
	1872	+
	1873	+ if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) {
1194	1874	vm_unmap_ram(mem, count);
1195	1875	return NULL;
1196	1876	}
..	..	@@ -1199,6 +1879,7 @@
1199	1879	EXPORT_SYMBOL(vm_map_ram);
1200	1880
1201	1881	static struct vm_struct *vmlist __initdata;
	1882	+
1202	1883	/**
1203	1884	* vm_area_add_early - add vmap area early during boot
1204	1885	* @vm: vm_struct to add
..	..	@@ -1250,11 +1931,57 @@
1250	1931	vm_area_add_early(vm);
1251	1932	}
1252	1933
	1934	+static void vmap_init_free_space(void)
	1935	+{
	1936	+ unsigned long vmap_start = 1;
	1937	+ const unsigned long vmap_end = ULONG_MAX;
	1938	+ struct vmap_area busy, free;
	1939	+
	1940	+ /*
	1941	+ * B F B B B F
	1942	+ * -\|-----\|.....\|-----\|-----\|-----\|.....\|-
	1943	+ * \| The KVA space \|
	1944	+ * \|<--------------------------------->\|
	1945	+ */
	1946	+ list_for_each_entry(busy, &vmap_area_list, list) {
	1947	+ if (busy->va_start - vmap_start > 0) {
	1948	+ free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
	1949	+ if (!WARN_ON_ONCE(!free)) {
	1950	+ free->va_start = vmap_start;
	1951	+ free->va_end = busy->va_start;
	1952	+
	1953	+ insert_vmap_area_augment(free, NULL,
	1954	+ &free_vmap_area_root,
	1955	+ &free_vmap_area_list);
	1956	+ }
	1957	+ }
	1958	+
	1959	+ vmap_start = busy->va_end;
	1960	+ }
	1961	+
	1962	+ if (vmap_end - vmap_start > 0) {
	1963	+ free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
	1964	+ if (!WARN_ON_ONCE(!free)) {
	1965	+ free->va_start = vmap_start;
	1966	+ free->va_end = vmap_end;
	1967	+
	1968	+ insert_vmap_area_augment(free, NULL,
	1969	+ &free_vmap_area_root,
	1970	+ &free_vmap_area_list);
	1971	+ }
	1972	+ }
	1973	+}
	1974	+
1253	1975	void __init vmalloc_init(void)
1254	1976	{
1255	1977	struct vmap_area *va;
1256	1978	struct vm_struct *tmp;
1257	1979	int i;
	1980	+
	1981	+ /*
	1982	+ * Create the cache for vmap_area objects.
	1983	+ */
	1984	+ vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
1258	1985
1259	1986	for_each_possible_cpu(i) {
1260	1987	struct vmap_block_queue *vbq;
..	..	@@ -1270,63 +1997,22 @@
1270	1997
1271	1998	/* Import existing vmlist entries. */
1272	1999	for (tmp = vmlist; tmp; tmp = tmp->next) {
1273		- va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1274		- va->flags = VM_VM_AREA;
	2000	+ va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
	2001	+ if (WARN_ON_ONCE(!va))
	2002	+ continue;
	2003	+
1275	2004	va->va_start = (unsigned long)tmp->addr;
1276	2005	va->va_end = va->va_start + tmp->size;
1277	2006	va->vm = tmp;
1278		- __insert_vmap_area(va);
	2007	+ insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
1279	2008	}
1280	2009
1281		- vmap_area_pcpu_hole = VMALLOC_END;
1282		-
	2010	+ /*
	2011	+ * Now we can initialize a free vmap space.
	2012	+ */
	2013	+ vmap_init_free_space();
1283	2014	vmap_initialized = true;
1284	2015	}
1285		-
1286		-/**
1287		- * map_kernel_range_noflush - map kernel VM area with the specified pages
1288		- * @addr: start of the VM area to map
1289		- * @size: size of the VM area to map
1290		- * @prot: page protection flags to use
1291		- * @pages: pages to map
1292		- *
1293		- * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size
1294		- * specify should have been allocated using get_vm_area() and its
1295		- * friends.
1296		- *
1297		- * NOTE:
1298		- * This function does NOT do any cache flushing. The caller is
1299		- * responsible for calling flush_cache_vmap() on to-be-mapped areas
1300		- * before calling this function.
1301		- *
1302		- * RETURNS:
1303		- * The number of pages mapped on success, -errno on failure.
1304		- */
1305		-int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1306		- pgprot_t prot, struct page **pages)
1307		-{
1308		- return vmap_page_range_noflush(addr, addr + size, prot, pages);
1309		-}
1310		-
1311		-/**
1312		- * unmap_kernel_range_noflush - unmap kernel VM area
1313		- * @addr: start of the VM area to unmap
1314		- * @size: size of the VM area to unmap
1315		- *
1316		- * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size
1317		- * specify should have been allocated using get_vm_area() and its
1318		- * friends.
1319		- *
1320		- * NOTE:
1321		- * This function does NOT do any cache flushing. The caller is
1322		- * responsible for calling flush_cache_vunmap() on to-be-mapped areas
1323		- * before calling this function and flush_tlb_kernel_range() after.
1324		- */
1325		-void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1326		-{
1327		- vunmap_page_range(addr, addr + size);
1328		-}
1329		-EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1330	2016
1331	2017	/**
1332	2018	* unmap_kernel_range - unmap kernel VM area and flush cache and TLB
..	..	@@ -1341,33 +2027,26 @@
1341	2027	unsigned long end = addr + size;
1342	2028
1343	2029	flush_cache_vunmap(addr, end);
1344		- vunmap_page_range(addr, end);
	2030	+ unmap_kernel_range_noflush(addr, size);
1345	2031	flush_tlb_kernel_range(addr, end);
1346	2032	}
1347		-EXPORT_SYMBOL_GPL(unmap_kernel_range);
1348	2033
1349		-int map_vm_area(struct vm_struct area, pgprot_t prot, struct page *pages)
	2034	+static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
	2035	+ struct vmap_area va, unsigned long flags, const void caller)
1350	2036	{
1351		- unsigned long addr = (unsigned long)area->addr;
1352		- unsigned long end = addr + get_vm_area_size(area);
1353		- int err;
1354		-
1355		- err = vmap_page_range(addr, end, prot, pages);
1356		-
1357		- return err > 0 ? 0 : err;
1358		-}
1359		-EXPORT_SYMBOL_GPL(map_vm_area);
1360		-
1361		-static void setup_vmalloc_vm(struct vm_struct vm, struct vmap_area va,
1362		- unsigned long flags, const void *caller)
1363		-{
1364		- spin_lock(&vmap_area_lock);
1365	2037	vm->flags = flags;
1366	2038	vm->addr = (void *)va->va_start;
1367	2039	vm->size = va->va_end - va->va_start;
1368	2040	vm->caller = caller;
1369	2041	va->vm = vm;
1370		- va->flags \|= VM_VM_AREA;
	2042	+ trace_android_vh_save_vmalloc_stack(flags, vm);
	2043	+}
	2044	+
	2045	+static void setup_vmalloc_vm(struct vm_struct vm, struct vmap_area va,
	2046	+ unsigned long flags, const void *caller)
	2047	+{
	2048	+ spin_lock(&vmap_area_lock);
	2049	+ setup_vmalloc_vm_locked(vm, va, flags, caller);
1371	2050	spin_unlock(&vmap_area_lock);
1372	2051	}
1373	2052
..	..	@@ -1388,6 +2067,7 @@
1388	2067	{
1389	2068	struct vmap_area *va;
1390	2069	struct vm_struct *area;
	2070	+ unsigned long requested_size = size;
1391	2071
1392	2072	BUG_ON(in_interrupt());
1393	2073	size = PAGE_ALIGN(size);
..	..	@@ -1411,18 +2091,12 @@
1411	2091	return NULL;
1412	2092	}
1413	2093
	2094	+ kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
	2095	+
1414	2096	setup_vmalloc_vm(area, va, flags, caller);
1415	2097
1416	2098	return area;
1417	2099	}
1418		-
1419		-struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1420		- unsigned long start, unsigned long end)
1421		-{
1422		- return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1423		- GFP_KERNEL, __builtin_return_address(0));
1424		-}
1425		-EXPORT_SYMBOL_GPL(__get_vm_area);
1426	2100
1427	2101	struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1428	2102	unsigned long start, unsigned long end,
..	..	@@ -1431,15 +2105,18 @@
1431	2105	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1432	2106	GFP_KERNEL, caller);
1433	2107	}
	2108	+EXPORT_SYMBOL_GPL(__get_vm_area_caller);
1434	2109
1435	2110	/**
1436		- * get_vm_area - reserve a contiguous kernel virtual area
1437		- * @size: size of the area
1438		- * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC
	2111	+ * get_vm_area - reserve a contiguous kernel virtual area
	2112	+ * @size: size of the area
	2113	+ * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC
1439	2114	*
1440		- * Search an area of @size in the kernel virtual mapping area,
1441		- * and reserved it for out purposes. Returns the area descriptor
1442		- * on success or %NULL on failure.
	2115	+ * Search an area of @size in the kernel virtual mapping area,
	2116	+ * and reserved it for out purposes. Returns the area descriptor
	2117	+ * on success or %NULL on failure.
	2118	+ *
	2119	+ * Return: the area descriptor on success or %NULL on failure.
1443	2120	*/
1444	2121	struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1445	2122	{
..	..	@@ -1447,7 +2124,6 @@
1447	2124	NUMA_NO_NODE, GFP_KERNEL,
1448	2125	__builtin_return_address(0));
1449	2126	}
1450		-EXPORT_SYMBOL_GPL(get_vm_area);
1451	2127
1452	2128	struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1453	2129	const void *caller)
..	..	@@ -1457,31 +2133,35 @@
1457	2133	}
1458	2134
1459	2135	/**
1460		- * find_vm_area - find a continuous kernel virtual area
1461		- * @addr: base address
	2136	+ * find_vm_area - find a continuous kernel virtual area
	2137	+ * @addr: base address
1462	2138	*
1463		- * Search for the kernel VM area starting at @addr, and return it.
1464		- * It is up to the caller to do all required locking to keep the returned
1465		- * pointer valid.
	2139	+ * Search for the kernel VM area starting at @addr, and return it.
	2140	+ * It is up to the caller to do all required locking to keep the returned
	2141	+ * pointer valid.
	2142	+ *
	2143	+ * Return: the area descriptor on success or %NULL on failure.
1466	2144	*/
1467	2145	struct vm_struct find_vm_area(const void addr)
1468	2146	{
1469	2147	struct vmap_area *va;
1470	2148
1471	2149	va = find_vmap_area((unsigned long)addr);
1472		- if (va && va->flags & VM_VM_AREA)
1473		- return va->vm;
	2150	+ if (!va)
	2151	+ return NULL;
1474	2152
1475		- return NULL;
	2153	+ return va->vm;
1476	2154	}
1477	2155
1478	2156	/**
1479		- * remove_vm_area - find and remove a continuous kernel virtual area
1480		- * @addr: base address
	2157	+ * remove_vm_area - find and remove a continuous kernel virtual area
	2158	+ * @addr: base address
1481	2159	*
1482		- * Search for the kernel VM area starting at @addr, and remove it.
1483		- * This function returns the found VM area, but using it is NOT safe
1484		- * on SMP machines, except for its size or flags.
	2160	+ * Search for the kernel VM area starting at @addr, and remove it.
	2161	+ * This function returns the found VM area, but using it is NOT safe
	2162	+ * on SMP machines, except for its size or flags.
	2163	+ *
	2164	+ * Return: the area descriptor on success or %NULL on failure.
1485	2165	*/
1486	2166	struct vm_struct remove_vm_area(const void addr)
1487	2167	{
..	..	@@ -1489,14 +2169,13 @@
1489	2169
1490	2170	might_sleep();
1491	2171
1492		- va = find_vmap_area((unsigned long)addr);
1493		- if (va && va->flags & VM_VM_AREA) {
	2172	+ spin_lock(&vmap_area_lock);
	2173	+ va = __find_vmap_area((unsigned long)addr);
	2174	+ if (va && va->vm) {
1494	2175	struct vm_struct *vm = va->vm;
1495	2176
1496		- spin_lock(&vmap_area_lock);
	2177	+ trace_android_vh_remove_vmalloc_stack(vm);
1497	2178	va->vm = NULL;
1498		- va->flags &= ~VM_VM_AREA;
1499		- va->flags \|= VM_LAZY_FREE;
1500	2179	spin_unlock(&vmap_area_lock);
1501	2180
1502	2181	kasan_free_shadow(vm);
..	..	@@ -1504,7 +2183,66 @@
1504	2183
1505	2184	return vm;
1506	2185	}
	2186	+
	2187	+ spin_unlock(&vmap_area_lock);
1507	2188	return NULL;
	2189	+}
	2190	+
	2191	+static inline void set_area_direct_map(const struct vm_struct *area,
	2192	+ int (set_direct_map)(struct page page))
	2193	+{
	2194	+ int i;
	2195	+
	2196	+ for (i = 0; i < area->nr_pages; i++)
	2197	+ if (page_address(area->pages[i]))
	2198	+ set_direct_map(area->pages[i]);
	2199	+}
	2200	+
	2201	+/* Handle removing and resetting vm mappings related to the vm_struct. */
	2202	+static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
	2203	+{
	2204	+ unsigned long start = ULONG_MAX, end = 0;
	2205	+ int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
	2206	+ int flush_dmap = 0;
	2207	+ int i;
	2208	+
	2209	+ remove_vm_area(area->addr);
	2210	+
	2211	+ /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
	2212	+ if (!flush_reset)
	2213	+ return;
	2214	+
	2215	+ /*
	2216	+ * If not deallocating pages, just do the flush of the VM area and
	2217	+ * return.
	2218	+ */
	2219	+ if (!deallocate_pages) {
	2220	+ vm_unmap_aliases();
	2221	+ return;
	2222	+ }
	2223	+
	2224	+ /*
	2225	+ * If execution gets here, flush the vm mapping and reset the direct
	2226	+ * map. Find the start and end range of the direct mappings to make sure
	2227	+ * the vm_unmap_aliases() flush includes the direct map.
	2228	+ */
	2229	+ for (i = 0; i < area->nr_pages; i++) {
	2230	+ unsigned long addr = (unsigned long)page_address(area->pages[i]);
	2231	+ if (addr) {
	2232	+ start = min(addr, start);
	2233	+ end = max(addr + PAGE_SIZE, end);
	2234	+ flush_dmap = 1;
	2235	+ }
	2236	+ }
	2237	+
	2238	+ /*
	2239	+ * Set direct map to something invalid so that it won't be cached if
	2240	+ * there are any accesses after the TLB flush, then flush the TLB and
	2241	+ * reset the direct map permissions to the default.
	2242	+ */
	2243	+ set_area_direct_map(area, set_direct_map_invalid_noflush);
	2244	+ _vm_unmap_aliases(start, end, flush_dmap);
	2245	+ set_area_direct_map(area, set_direct_map_default_noflush);
1508	2246	}
1509	2247
1510	2248	static void __vunmap(const void *addr, int deallocate_pages)
..	..	@@ -1528,7 +2266,10 @@
1528	2266	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
1529	2267	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
1530	2268
1531		- remove_vm_area(addr);
	2269	+ kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
	2270	+
	2271	+ vm_remove_mappings(area, deallocate_pages);
	2272	+
1532	2273	if (deallocate_pages) {
1533	2274	int i;
1534	2275
..	..	@@ -1553,7 +2294,7 @@
1553	2294	* Use raw_cpu_ptr() because this can be called from preemptible
1554	2295	* context. Preemption is absolutely fine here, because the llist_add()
1555	2296	* implementation is lockless, so it works even if we are adding to
1556		- * nother cpu's list. schedule_work() should be fine with this too.
	2297	+ * another cpu's list. schedule_work() should be fine with this too.
1557	2298	*/
1558	2299	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
1559	2300
..	..	@@ -1562,11 +2303,11 @@
1562	2303	}
1563	2304
1564	2305	/**
1565		- * vfree_atomic - release memory allocated by vmalloc()
1566		- * @addr: memory base address
	2306	+ * vfree_atomic - release memory allocated by vmalloc()
	2307	+ * @addr: memory base address
1567	2308	*
1568		- * This one is just like vfree() but can be called in any atomic context
1569		- * except NMIs.
	2309	+ * This one is just like vfree() but can be called in any atomic context
	2310	+ * except NMIs.
1570	2311	*/
1571	2312	void vfree_atomic(const void *addr)
1572	2313	{
..	..	@@ -1579,19 +2320,30 @@
1579	2320	__vfree_deferred(addr);
1580	2321	}
1581	2322
	2323	+static void __vfree(const void *addr)
	2324	+{
	2325	+ if (unlikely(in_interrupt()))
	2326	+ __vfree_deferred(addr);
	2327	+ else
	2328	+ __vunmap(addr, 1);
	2329	+}
	2330	+
1582	2331	/**
1583		- * vfree - release memory allocated by vmalloc()
1584		- * @addr: memory base address
	2332	+ * vfree - Release memory allocated by vmalloc()
	2333	+ * @addr: Memory base address
1585	2334	*
1586		- * Free the virtually continuous memory area starting at @addr, as
1587		- * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
1588		- * NULL, no operation is performed.
	2335	+ * Free the virtually continuous memory area starting at @addr, as obtained
	2336	+ * from one of the vmalloc() family of APIs. This will usually also free the
	2337	+ * physical memory underlying the virtual allocation, but that memory is
	2338	+ * reference counted, so it will not be freed until the last user goes away.
1589	2339	*
1590		- * Must not be called in NMI context (strictly speaking, only if we don't
1591		- * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
1592		- * conventions for vfree() arch-depenedent would be a really bad idea)
	2340	+ * If @addr is NULL, no operation is performed.
1593	2341	*
1594		- * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
	2342	+ * Context:
	2343	+ * May sleep if called not from interrupt context.
	2344	+ * Must not be called in NMI context (strictly speaking, it could be
	2345	+ * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
	2346	+ * conventions for vfree() arch-depenedent would be a really bad idea).
1595	2347	*/
1596	2348	void vfree(const void *addr)
1597	2349	{
..	..	@@ -1599,23 +2351,23 @@
1599	2351
1600	2352	kmemleak_free(addr);
1601	2353
	2354	+ might_sleep_if(!in_interrupt());
	2355	+
1602	2356	if (!addr)
1603	2357	return;
1604		- if (unlikely(in_interrupt()))
1605		- __vfree_deferred(addr);
1606		- else
1607		- __vunmap(addr, 1);
	2358	+
	2359	+ __vfree(addr);
1608	2360	}
1609	2361	EXPORT_SYMBOL(vfree);
1610	2362
1611	2363	/**
1612		- * vunmap - release virtual mapping obtained by vmap()
1613		- * @addr: memory base address
	2364	+ * vunmap - release virtual mapping obtained by vmap()
	2365	+ * @addr: memory base address
1614	2366	*
1615		- * Free the virtually contiguous memory area starting at @addr,
1616		- * which was created from the page array passed to vmap().
	2367	+ * Free the virtually contiguous memory area starting at @addr,
	2368	+ * which was created from the page array passed to vmap().
1617	2369	*
1618		- * Must not be called in interrupt context.
	2370	+ * Must not be called in interrupt context.
1619	2371	*/
1620	2372	void vunmap(const void *addr)
1621	2373	{
..	..	@@ -1627,24 +2379,29 @@
1627	2379	EXPORT_SYMBOL(vunmap);
1628	2380
1629	2381	/**
1630		- * vmap - map an array of pages into virtually contiguous space
1631		- * @pages: array of page pointers
1632		- * @count: number of pages to map
1633		- * @flags: vm_area->flags
1634		- * @prot: page protection for the mapping
	2382	+ * vmap - map an array of pages into virtually contiguous space
	2383	+ * @pages: array of page pointers
	2384	+ * @count: number of pages to map
	2385	+ * @flags: vm_area->flags
	2386	+ * @prot: page protection for the mapping
1635	2387	*
1636		- * Maps @count pages from @pages into contiguous kernel virtual
1637		- * space.
	2388	+ * Maps @count pages from @pages into contiguous kernel virtual space.
	2389	+ * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
	2390	+ * (which must be kmalloc or vmalloc memory) and one reference per pages in it
	2391	+ * are transferred from the caller to vmap(), and will be freed / dropped when
	2392	+ * vfree() is called on the return value.
	2393	+ *
	2394	+ * Return: the address of the area or %NULL on failure
1638	2395	*/
1639	2396	void vmap(struct page *pages, unsigned int count,
1640		- unsigned long flags, pgprot_t prot)
	2397	+ unsigned long flags, pgprot_t prot)
1641	2398	{
1642	2399	struct vm_struct *area;
1643	2400	unsigned long size; /* In bytes */
1644	2401
1645	2402	might_sleep();
1646	2403
1647		- if (count > totalram_pages)
	2404	+ if (count > totalram_pages())
1648	2405	return NULL;
1649	2406
1650	2407	size = (unsigned long)count << PAGE_SHIFT;
..	..	@@ -1652,36 +2409,85 @@
1652	2409	if (!area)
1653	2410	return NULL;
1654	2411
1655		- if (map_vm_area(area, prot, pages)) {
	2412	+ if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot),
	2413	+ pages) < 0) {
1656	2414	vunmap(area->addr);
1657	2415	return NULL;
1658	2416	}
1659	2417
	2418	+ if (flags & VM_MAP_PUT_PAGES) {
	2419	+ area->pages = pages;
	2420	+ area->nr_pages = count;
	2421	+ }
1660	2422	return area->addr;
1661	2423	}
1662	2424	EXPORT_SYMBOL(vmap);
1663	2425
1664		-static void *__vmalloc_node(unsigned long size, unsigned long align,
1665		- gfp_t gfp_mask, pgprot_t prot,
1666		- int node, const void *caller);
	2426	+#ifdef CONFIG_VMAP_PFN
	2427	+struct vmap_pfn_data {
	2428	+ unsigned long *pfns;
	2429	+ pgprot_t prot;
	2430	+ unsigned int idx;
	2431	+};
	2432	+
	2433	+static int vmap_pfn_apply(pte_t pte, unsigned long addr, void private)
	2434	+{
	2435	+ struct vmap_pfn_data *data = private;
	2436	+
	2437	+ if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
	2438	+ return -EINVAL;
	2439	+ *pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
	2440	+ return 0;
	2441	+}
	2442	+
	2443	+/**
	2444	+ * vmap_pfn - map an array of PFNs into virtually contiguous space
	2445	+ * @pfns: array of PFNs
	2446	+ * @count: number of pages to map
	2447	+ * @prot: page protection for the mapping
	2448	+ *
	2449	+ * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
	2450	+ * the start address of the mapping.
	2451	+ */
	2452	+void vmap_pfn(unsigned long pfns, unsigned int count, pgprot_t prot)
	2453	+{
	2454	+ struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
	2455	+ struct vm_struct *area;
	2456	+
	2457	+ area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
	2458	+ __builtin_return_address(0));
	2459	+ if (!area)
	2460	+ return NULL;
	2461	+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
	2462	+ count * PAGE_SIZE, vmap_pfn_apply, &data)) {
	2463	+ free_vm_area(area);
	2464	+ return NULL;
	2465	+ }
	2466	+
	2467	+ flush_cache_vmap((unsigned long)area->addr,
	2468	+ (unsigned long)area->addr + count * PAGE_SIZE);
	2469	+
	2470	+ return area->addr;
	2471	+}
	2472	+EXPORT_SYMBOL_GPL(vmap_pfn);
	2473	+#endif /* CONFIG_VMAP_PFN */
	2474	+
1667	2475	static void __vmalloc_area_node(struct vm_struct area, gfp_t gfp_mask,
1668	2476	pgprot_t prot, int node)
1669	2477	{
1670		- struct page **pages;
1671		- unsigned int nr_pages, array_size, i;
1672	2478	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) \| __GFP_ZERO;
1673		- const gfp_t alloc_mask = gfp_mask \| __GFP_NOWARN;
1674		- const gfp_t highmem_mask = (gfp_mask & (GFP_DMA \| GFP_DMA32)) ?
1675		- 0 :
1676		- __GFP_HIGHMEM;
	2479	+ unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
	2480	+ unsigned int array_size = nr_pages * sizeof(struct page *), i;
	2481	+ struct page **pages;
1677	2482
1678		- nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
1679		- array_size = (nr_pages * sizeof(struct page *));
	2483	+ gfp_mask \|= __GFP_NOWARN;
	2484	+ if (!(gfp_mask & (GFP_DMA \| GFP_DMA32)))
	2485	+ gfp_mask \|= __GFP_HIGHMEM;
1680	2486
1681	2487	/* Please note that the recursion is strictly bounded. */
1682	2488	if (array_size > PAGE_SIZE) {
1683		- pages = __vmalloc_node(array_size, 1, nested_gfp\|highmem_mask,
1684		- PAGE_KERNEL, node, area->caller);
	2489	+ pages = __vmalloc_node(array_size, 1, nested_gfp, node,
	2490	+ area->caller);
1685	2491	} else {
1686	2492	pages = kmalloc_node(array_size, nested_gfp, node);
1687	2493	}
..	..	@@ -1699,49 +2505,53 @@
1699	2505	struct page *page;
1700	2506
1701	2507	if (node == NUMA_NO_NODE)
1702		- page = alloc_page(alloc_mask\|highmem_mask);
	2508	+ page = alloc_page(gfp_mask);
1703	2509	else
1704		- page = alloc_pages_node(node, alloc_mask\|highmem_mask, 0);
	2510	+ page = alloc_pages_node(node, gfp_mask, 0);
1705	2511
1706	2512	if (unlikely(!page)) {
1707		- /* Successfully allocated i pages, free them in __vunmap() */
	2513	+ /* Successfully allocated i pages, free them in __vfree() */
1708	2514	area->nr_pages = i;
1709	2515	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1710	2516	goto fail;
1711	2517	}
1712	2518	area->pages[i] = page;
1713		- if (gfpflags_allow_blocking(gfp_mask\|highmem_mask))
	2519	+ if (gfpflags_allow_blocking(gfp_mask))
1714	2520	cond_resched();
1715	2521	}
1716	2522	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1717	2523
1718		- if (map_vm_area(area, prot, pages))
	2524	+ if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
	2525	+ prot, pages) < 0)
1719	2526	goto fail;
	2527	+
1720	2528	return area->addr;
1721	2529
1722	2530	fail:
1723	2531	warn_alloc(gfp_mask, NULL,
1724	2532	"vmalloc: allocation failure, allocated %ld of %ld bytes",
1725	2533	(area->nr_pages*PAGE_SIZE), area->size);
1726		- vfree(area->addr);
	2534	+ __vfree(area->addr);
1727	2535	return NULL;
1728	2536	}
1729	2537
1730	2538	/**
1731		- * __vmalloc_node_range - allocate virtually contiguous memory
1732		- * @size: allocation size
1733		- * @align: desired alignment
1734		- * @start: vm area range start
1735		- * @end: vm area range end
1736		- * @gfp_mask: flags for the page level allocator
1737		- * @prot: protection mask for the allocated pages
1738		- * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD)
1739		- * @node: node to use for allocation or NUMA_NO_NODE
1740		- * @caller: caller's return address
	2539	+ * __vmalloc_node_range - allocate virtually contiguous memory
	2540	+ * @size: allocation size
	2541	+ * @align: desired alignment
	2542	+ * @start: vm area range start
	2543	+ * @end: vm area range end
	2544	+ * @gfp_mask: flags for the page level allocator
	2545	+ * @prot: protection mask for the allocated pages
	2546	+ * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD)
	2547	+ * @node: node to use for allocation or NUMA_NO_NODE
	2548	+ * @caller: caller's return address
1741	2549	*
1742		- * Allocate enough pages to cover @size from the page level
1743		- * allocator with @gfp_mask flags. Map them into contiguous
1744		- * kernel virtual space, using a pagetable protection of @prot.
	2550	+ * Allocate enough pages to cover @size from the page level
	2551	+ * allocator with @gfp_mask flags. Map them into contiguous
	2552	+ * kernel virtual space, using a pagetable protection of @prot.
	2553	+ *
	2554	+ * Return: the address of the area or %NULL on failure
1745	2555	*/
1746	2556	void *__vmalloc_node_range(unsigned long size, unsigned long align,
1747	2557	unsigned long start, unsigned long end, gfp_t gfp_mask,
..	..	@@ -1753,10 +2563,10 @@
1753	2563	unsigned long real_size = size;
1754	2564
1755	2565	size = PAGE_ALIGN(size);
1756		- if (!size \|\| (size >> PAGE_SHIFT) > totalram_pages)
	2566	+ if (!size \|\| (size >> PAGE_SHIFT) > totalram_pages())
1757	2567	goto fail;
1758	2568
1759		- area = __get_vm_area_node(size, align, VM_ALLOC \| VM_UNINITIALIZED \|
	2569	+ area = __get_vm_area_node(real_size, align, VM_ALLOC \| VM_UNINITIALIZED \|
1760	2570	vm_flags, start, end, node, gfp_mask, caller);
1761	2571	if (!area)
1762	2572	goto fail;
..	..	@@ -1764,12 +2574,6 @@
1764	2574	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1765	2575	if (!addr)
1766	2576	return NULL;
1767		-
1768		- /*
1769		- * First make sure the mappings are removed from all page-tables
1770		- * before they are freed.
1771		- */
1772		- vmalloc_sync_unmappings();
1773	2577
1774	2578	/*
1775	2579	* In this function, newly allocated vm_struct has VM_UNINITIALIZED
..	..	@@ -1789,84 +2593,82 @@
1789	2593	}
1790	2594
1791	2595	/**
1792		- * __vmalloc_node - allocate virtually contiguous memory
1793		- * @size: allocation size
1794		- * @align: desired alignment
1795		- * @gfp_mask: flags for the page level allocator
1796		- * @prot: protection mask for the allocated pages
1797		- * @node: node to use for allocation or NUMA_NO_NODE
1798		- * @caller: caller's return address
	2596	+ * __vmalloc_node - allocate virtually contiguous memory
	2597	+ * @size: allocation size
	2598	+ * @align: desired alignment
	2599	+ * @gfp_mask: flags for the page level allocator
	2600	+ * @node: node to use for allocation or NUMA_NO_NODE
	2601	+ * @caller: caller's return address
1799	2602	*
1800		- * Allocate enough pages to cover @size from the page level
1801		- * allocator with @gfp_mask flags. Map them into contiguous
1802		- * kernel virtual space, using a pagetable protection of @prot.
	2603	+ * Allocate enough pages to cover @size from the page level allocator with
	2604	+ * @gfp_mask flags. Map them into contiguous kernel virtual space.
1803	2605	*
1804		- * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
1805		- * and __GFP_NOFAIL are not supported
	2606	+ * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
	2607	+ * and __GFP_NOFAIL are not supported
1806	2608	*
1807		- * Any use of gfp flags outside of GFP_KERNEL should be consulted
1808		- * with mm people.
	2609	+ * Any use of gfp flags outside of GFP_KERNEL should be consulted
	2610	+ * with mm people.
1809	2611	*
	2612	+ * Return: pointer to the allocated memory or %NULL on error
1810	2613	*/
1811		-static void *__vmalloc_node(unsigned long size, unsigned long align,
1812		- gfp_t gfp_mask, pgprot_t prot,
1813		- int node, const void *caller)
	2614	+void *__vmalloc_node(unsigned long size, unsigned long align,
	2615	+ gfp_t gfp_mask, int node, const void *caller)
1814	2616	{
1815	2617	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1816		- gfp_mask, prot, 0, node, caller);
	2618	+ gfp_mask, PAGE_KERNEL, 0, node, caller);
1817	2619	}
	2620	+/*
	2621	+ * This is only for performance analysis of vmalloc and stress purpose.
	2622	+ * It is required by vmalloc test module, therefore do not use it other
	2623	+ * than that.
	2624	+ */
	2625	+#ifdef CONFIG_TEST_VMALLOC_MODULE
	2626	+EXPORT_SYMBOL_GPL(__vmalloc_node);
	2627	+#endif
1818	2628
1819		-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
	2629	+void *__vmalloc(unsigned long size, gfp_t gfp_mask)
1820	2630	{
1821		- return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
	2631	+ return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
1822	2632	__builtin_return_address(0));
1823	2633	}
1824	2634	EXPORT_SYMBOL(__vmalloc);
1825	2635
1826		-static inline void *__vmalloc_node_flags(unsigned long size,
1827		- int node, gfp_t flags)
1828		-{
1829		- return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1830		- node, __builtin_return_address(0));
1831		-}
1832		-
1833		-
1834		-void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
1835		- void *caller)
1836		-{
1837		- return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
1838		-}
1839		-
1840	2636	/**
1841		- * vmalloc - allocate virtually contiguous memory
1842		- * @size: allocation size
1843		- * Allocate enough pages to cover @size from the page level
1844		- * allocator and map them into contiguous kernel virtual space.
	2637	+ * vmalloc - allocate virtually contiguous memory
	2638	+ * @size: allocation size
1845	2639	*
1846		- * For tight control over page level allocator and protection flags
1847		- * use __vmalloc() instead.
	2640	+ * Allocate enough pages to cover @size from the page level
	2641	+ * allocator and map them into contiguous kernel virtual space.
	2642	+ *
	2643	+ * For tight control over page level allocator and protection flags
	2644	+ * use __vmalloc() instead.
	2645	+ *
	2646	+ * Return: pointer to the allocated memory or %NULL on error
1848	2647	*/
1849	2648	void *vmalloc(unsigned long size)
1850	2649	{
1851		- return __vmalloc_node_flags(size, NUMA_NO_NODE,
1852		- GFP_KERNEL);
	2650	+ return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
	2651	+ __builtin_return_address(0));
1853	2652	}
1854	2653	EXPORT_SYMBOL(vmalloc);
1855	2654
1856	2655	/**
1857		- * vzalloc - allocate virtually contiguous memory with zero fill
1858		- * @size: allocation size
1859		- * Allocate enough pages to cover @size from the page level
1860		- * allocator and map them into contiguous kernel virtual space.
1861		- * The memory allocated is set to zero.
	2656	+ * vzalloc - allocate virtually contiguous memory with zero fill
	2657	+ * @size: allocation size
1862	2658	*
1863		- * For tight control over page level allocator and protection flags
1864		- * use __vmalloc() instead.
	2659	+ * Allocate enough pages to cover @size from the page level
	2660	+ * allocator and map them into contiguous kernel virtual space.
	2661	+ * The memory allocated is set to zero.
	2662	+ *
	2663	+ * For tight control over page level allocator and protection flags
	2664	+ * use __vmalloc() instead.
	2665	+ *
	2666	+ * Return: pointer to the allocated memory or %NULL on error
1865	2667	*/
1866	2668	void *vzalloc(unsigned long size)
1867	2669	{
1868		- return __vmalloc_node_flags(size, NUMA_NO_NODE,
1869		- GFP_KERNEL \| __GFP_ZERO);
	2670	+ return __vmalloc_node(size, 1, GFP_KERNEL \| __GFP_ZERO, NUMA_NO_NODE,
	2671	+ __builtin_return_address(0));
1870	2672	}
1871	2673	EXPORT_SYMBOL(vzalloc);
1872	2674
..	..	@@ -1876,39 +2678,35 @@
1876	2678	*
1877	2679	* The resulting memory area is zeroed so it can be mapped to userspace
1878	2680	* without leaking data.
	2681	+ *
	2682	+ * Return: pointer to the allocated memory or %NULL on error
1879	2683	*/
1880	2684	void *vmalloc_user(unsigned long size)
1881	2685	{
1882		- struct vm_struct *area;
1883		- void *ret;
1884		-
1885		- ret = __vmalloc_node(size, SHMLBA,
1886		- GFP_KERNEL \| __GFP_ZERO,
1887		- PAGE_KERNEL, NUMA_NO_NODE,
1888		- __builtin_return_address(0));
1889		- if (ret) {
1890		- area = find_vm_area(ret);
1891		- area->flags \|= VM_USERMAP;
1892		- }
1893		- return ret;
	2686	+ return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
	2687	+ GFP_KERNEL \| __GFP_ZERO, PAGE_KERNEL,
	2688	+ VM_USERMAP, NUMA_NO_NODE,
	2689	+ __builtin_return_address(0));
1894	2690	}
1895	2691	EXPORT_SYMBOL(vmalloc_user);
1896	2692
1897	2693	/**
1898		- * vmalloc_node - allocate memory on a specific node
1899		- * @size: allocation size
1900		- * @node: numa node
	2694	+ * vmalloc_node - allocate memory on a specific node
	2695	+ * @size: allocation size
	2696	+ * @node: numa node
1901	2697	*
1902		- * Allocate enough pages to cover @size from the page level
1903		- * allocator and map them into contiguous kernel virtual space.
	2698	+ * Allocate enough pages to cover @size from the page level
	2699	+ * allocator and map them into contiguous kernel virtual space.
1904	2700	*
1905		- * For tight control over page level allocator and protection flags
1906		- * use __vmalloc() instead.
	2701	+ * For tight control over page level allocator and protection flags
	2702	+ * use __vmalloc() instead.
	2703	+ *
	2704	+ * Return: pointer to the allocated memory or %NULL on error
1907	2705	*/
1908	2706	void *vmalloc_node(unsigned long size, int node)
1909	2707	{
1910		- return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
1911		- node, __builtin_return_address(0));
	2708	+ return __vmalloc_node(size, 1, GFP_KERNEL, node,
	2709	+ __builtin_return_address(0));
1912	2710	}
1913	2711	EXPORT_SYMBOL(vmalloc_node);
1914	2712
..	..	@@ -1921,33 +2719,14 @@
1921	2719	* allocator and map them into contiguous kernel virtual space.
1922	2720	* The memory allocated is set to zero.
1923	2721	*
1924		- * For tight control over page level allocator and protection flags
1925		- * use __vmalloc_node() instead.
	2722	+ * Return: pointer to the allocated memory or %NULL on error
1926	2723	*/
1927	2724	void *vzalloc_node(unsigned long size, int node)
1928	2725	{
1929		- return __vmalloc_node_flags(size, node,
1930		- GFP_KERNEL \| __GFP_ZERO);
	2726	+ return __vmalloc_node(size, 1, GFP_KERNEL \| __GFP_ZERO, node,
	2727	+ __builtin_return_address(0));
1931	2728	}
1932	2729	EXPORT_SYMBOL(vzalloc_node);
1933		-
1934		-/**
1935		- * vmalloc_exec - allocate virtually contiguous, executable memory
1936		- * @size: allocation size
1937		- *
1938		- * Kernel-internal function to allocate enough pages to cover @size
1939		- * the page level allocator and map them into contiguous and
1940		- * executable kernel virtual space.
1941		- *
1942		- * For tight control over page level allocator and protection flags
1943		- * use __vmalloc() instead.
1944		- */
1945		-
1946		-void *vmalloc_exec(unsigned long size)
1947		-{
1948		- return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
1949		- NUMA_NO_NODE, __builtin_return_address(0));
1950		-}
1951	2730
1952	2731	#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1953	2732	#define GFP_VMALLOC32 (GFP_DMA32 \| GFP_KERNEL)
..	..	@@ -1962,38 +2741,36 @@
1962	2741	#endif
1963	2742
1964	2743	/**
1965		- * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
1966		- * @size: allocation size
	2744	+ * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
	2745	+ * @size: allocation size
1967	2746	*
1968		- * Allocate enough 32bit PA addressable pages to cover @size from the
1969		- * page level allocator and map them into contiguous kernel virtual space.
	2747	+ * Allocate enough 32bit PA addressable pages to cover @size from the
	2748	+ * page level allocator and map them into contiguous kernel virtual space.
	2749	+ *
	2750	+ * Return: pointer to the allocated memory or %NULL on error
1970	2751	*/
1971	2752	void *vmalloc_32(unsigned long size)
1972	2753	{
1973		- return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1974		- NUMA_NO_NODE, __builtin_return_address(0));
	2754	+ return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
	2755	+ __builtin_return_address(0));
1975	2756	}
1976	2757	EXPORT_SYMBOL(vmalloc_32);
1977	2758
1978	2759	/**
1979	2760	* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
1980		- * @size: allocation size
	2761	+ * @size: allocation size
1981	2762	*
1982	2763	* The resulting memory area is 32bit addressable and zeroed so it can be
1983	2764	* mapped to userspace without leaking data.
	2765	+ *
	2766	+ * Return: pointer to the allocated memory or %NULL on error
1984	2767	*/
1985	2768	void *vmalloc_32_user(unsigned long size)
1986	2769	{
1987		- struct vm_struct *area;
1988		- void *ret;
1989		-
1990		- ret = __vmalloc_node(size, 1, GFP_VMALLOC32 \| __GFP_ZERO, PAGE_KERNEL,
1991		- NUMA_NO_NODE, __builtin_return_address(0));
1992		- if (ret) {
1993		- area = find_vm_area(ret);
1994		- area->flags \|= VM_USERMAP;
1995		- }
1996		- return ret;
	2770	+ return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
	2771	+ GFP_VMALLOC32 \| __GFP_ZERO, PAGE_KERNEL,
	2772	+ VM_USERMAP, NUMA_NO_NODE,
	2773	+ __builtin_return_address(0));
1997	2774	}
1998	2775	EXPORT_SYMBOL(vmalloc_32_user);
1999	2776
..	..	@@ -2079,31 +2856,29 @@
2079	2856	}
2080	2857
2081	2858	/**
2082		- * vread() - read vmalloc area in a safe way.
2083		- * @buf: buffer for reading data
2084		- * @addr: vm address.
2085		- * @count: number of bytes to be read.
	2859	+ * vread() - read vmalloc area in a safe way.
	2860	+ * @buf: buffer for reading data
	2861	+ * @addr: vm address.
	2862	+ * @count: number of bytes to be read.
2086	2863	*
2087		- * Returns # of bytes which addr and buf should be increased.
2088		- * (same number to @count). Returns 0 if [addr...addr+count) doesn't
2089		- * includes any intersect with alive vmalloc area.
	2864	+ * This function checks that addr is a valid vmalloc'ed area, and
	2865	+ * copy data from that area to a given buffer. If the given memory range
	2866	+ * of [addr...addr+count) includes some valid address, data is copied to
	2867	+ * proper area of @buf. If there are memory holes, they'll be zero-filled.
	2868	+ * IOREMAP area is treated as memory hole and no copy is done.
2090	2869	*
2091		- * This function checks that addr is a valid vmalloc'ed area, and
2092		- * copy data from that area to a given buffer. If the given memory range
2093		- * of [addr...addr+count) includes some valid address, data is copied to
2094		- * proper area of @buf. If there are memory holes, they'll be zero-filled.
2095		- * IOREMAP area is treated as memory hole and no copy is done.
	2870	+ * If [addr...addr+count) doesn't includes any intersects with alive
	2871	+ * vm_struct area, returns 0. @buf should be kernel's buffer.
2096	2872	*
2097		- * If [addr...addr+count) doesn't includes any intersects with alive
2098		- * vm_struct area, returns 0. @buf should be kernel's buffer.
	2873	+ * Note: In usual ops, vread() is never necessary because the caller
	2874	+ * should know vmalloc() area is valid and can use memcpy().
	2875	+ * This is for routines which have to access vmalloc area without
	2876	+ * any information, as /dev/kmem.
2099	2877	*
2100		- * Note: In usual ops, vread() is never necessary because the caller
2101		- * should know vmalloc() area is valid and can use memcpy().
2102		- * This is for routines which have to access vmalloc area without
2103		- * any informaion, as /dev/kmem.
2104		- *
	2878	+ * Return: number of bytes for which addr and buf should be increased
	2879	+ * (same number as @count) or %0 if [addr...addr+count) doesn't
	2880	+ * include any intersection with valid vmalloc area
2105	2881	*/
2106		-
2107	2882	long vread(char buf, char addr, unsigned long count)
2108	2883	{
2109	2884	struct vmap_area *va;
..	..	@@ -2121,7 +2896,7 @@
2121	2896	if (!count)
2122	2897	break;
2123	2898
2124		- if (!(va->flags & VM_VM_AREA))
	2899	+ if (!va->vm)
2125	2900	continue;
2126	2901
2127	2902	vm = va->vm;
..	..	@@ -2160,31 +2935,29 @@
2160	2935	}
2161	2936
2162	2937	/**
2163		- * vwrite() - write vmalloc area in a safe way.
2164		- * @buf: buffer for source data
2165		- * @addr: vm address.
2166		- * @count: number of bytes to be read.
	2938	+ * vwrite() - write vmalloc area in a safe way.
	2939	+ * @buf: buffer for source data
	2940	+ * @addr: vm address.
	2941	+ * @count: number of bytes to be read.
2167	2942	*
2168		- * Returns # of bytes which addr and buf should be incresed.
2169		- * (same number to @count).
2170		- * If [addr...addr+count) doesn't includes any intersect with valid
2171		- * vmalloc area, returns 0.
	2943	+ * This function checks that addr is a valid vmalloc'ed area, and
	2944	+ * copy data from a buffer to the given addr. If specified range of
	2945	+ * [addr...addr+count) includes some valid address, data is copied from
	2946	+ * proper area of @buf. If there are memory holes, no copy to hole.
	2947	+ * IOREMAP area is treated as memory hole and no copy is done.
2172	2948	*
2173		- * This function checks that addr is a valid vmalloc'ed area, and
2174		- * copy data from a buffer to the given addr. If specified range of
2175		- * [addr...addr+count) includes some valid address, data is copied from
2176		- * proper area of @buf. If there are memory holes, no copy to hole.
2177		- * IOREMAP area is treated as memory hole and no copy is done.
	2949	+ * If [addr...addr+count) doesn't includes any intersects with alive
	2950	+ * vm_struct area, returns 0. @buf should be kernel's buffer.
2178	2951	*
2179		- * If [addr...addr+count) doesn't includes any intersects with alive
2180		- * vm_struct area, returns 0. @buf should be kernel's buffer.
	2952	+ * Note: In usual ops, vwrite() is never necessary because the caller
	2953	+ * should know vmalloc() area is valid and can use memcpy().
	2954	+ * This is for routines which have to access vmalloc area without
	2955	+ * any information, as /dev/kmem.
2181	2956	*
2182		- * Note: In usual ops, vwrite() is never necessary because the caller
2183		- * should know vmalloc() area is valid and can use memcpy().
2184		- * This is for routines which have to access vmalloc area without
2185		- * any informaion, as /dev/kmem.
	2957	+ * Return: number of bytes for which addr and buf should be
	2958	+ * increased (same number as @count) or %0 if [addr...addr+count)
	2959	+ * doesn't include any intersection with valid vmalloc area
2186	2960	*/
2187		-
2188	2961	long vwrite(char buf, char addr, unsigned long count)
2189	2962	{
2190	2963	struct vmap_area *va;
..	..	@@ -2203,7 +2976,7 @@
2203	2976	if (!count)
2204	2977	break;
2205	2978
2206		- if (!(va->flags & VM_VM_AREA))
	2979	+ if (!va->vm)
2207	2980	continue;
2208	2981
2209	2982	vm = va->vm;
..	..	@@ -2236,21 +3009,21 @@
2236	3009	}
2237	3010
2238	3011	/**
2239		- * remap_vmalloc_range_partial - map vmalloc pages to userspace
2240		- * @vma: vma to cover
2241		- * @uaddr: target user address to start at
2242		- * @kaddr: virtual address of vmalloc kernel memory
2243		- * @pgoff: offset from @kaddr to start at
2244		- * @size: size of map area
	3012	+ * remap_vmalloc_range_partial - map vmalloc pages to userspace
	3013	+ * @vma: vma to cover
	3014	+ * @uaddr: target user address to start at
	3015	+ * @kaddr: virtual address of vmalloc kernel memory
	3016	+ * @pgoff: offset from @kaddr to start at
	3017	+ * @size: size of map area
2245	3018	*
2246		- * Returns: 0 for success, -Exxx on failure
	3019	+ * Returns: 0 for success, -Exxx on failure
2247	3020	*
2248		- * This function checks that @kaddr is a valid vmalloc'ed area,
2249		- * and that it is big enough to cover the range starting at
2250		- * @uaddr in @vma. Will return failure if that criteria isn't
2251		- * met.
	3021	+ * This function checks that @kaddr is a valid vmalloc'ed area,
	3022	+ * and that it is big enough to cover the range starting at
	3023	+ * @uaddr in @vma. Will return failure if that criteria isn't
	3024	+ * met.
2252	3025	*
2253		- * Similar to remap_pfn_range() (see mm/memory.c)
	3026	+ * Similar to remap_pfn_range() (see mm/memory.c)
2254	3027	*/
2255	3028	int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2256	3029	void *kaddr, unsigned long pgoff,
..	..	@@ -2272,7 +3045,7 @@
2272	3045	if (!area)
2273	3046	return -EINVAL;
2274	3047
2275		- if (!(area->flags & VM_USERMAP))
	3048	+ if (!(area->flags & (VM_USERMAP \| VM_DMA_COHERENT)))
2276	3049	return -EINVAL;
2277	3050
2278	3051	if (check_add_overflow(size, off, &end_index) \|\|
..	..	@@ -2300,18 +3073,18 @@
2300	3073	EXPORT_SYMBOL(remap_vmalloc_range_partial);
2301	3074
2302	3075	/**
2303		- * remap_vmalloc_range - map vmalloc pages to userspace
2304		- * @vma: vma to cover (map full range of vma)
2305		- * @addr: vmalloc memory
2306		- * @pgoff: number of pages into addr before first page to map
	3076	+ * remap_vmalloc_range - map vmalloc pages to userspace
	3077	+ * @vma: vma to cover (map full range of vma)
	3078	+ * @addr: vmalloc memory
	3079	+ * @pgoff: number of pages into addr before first page to map
2307	3080	*
2308		- * Returns: 0 for success, -Exxx on failure
	3081	+ * Returns: 0 for success, -Exxx on failure
2309	3082	*
2310		- * This function checks that addr is a valid vmalloc'ed area, and
2311		- * that it is big enough to cover the vma. Will return failure if
2312		- * that criteria isn't met.
	3083	+ * This function checks that addr is a valid vmalloc'ed area, and
	3084	+ * that it is big enough to cover the vma. Will return failure if
	3085	+ * that criteria isn't met.
2313	3086	*
2314		- * Similar to remap_pfn_range() (see mm/memory.c)
	3087	+ * Similar to remap_pfn_range() (see mm/memory.c)
2315	3088	*/
2316	3089	int remap_vmalloc_range(struct vm_area_struct vma, void addr,
2317	3090	unsigned long pgoff)
..	..	@@ -2321,69 +3094,6 @@
2321	3094	vma->vm_end - vma->vm_start);
2322	3095	}
2323	3096	EXPORT_SYMBOL(remap_vmalloc_range);
2324		-
2325		-/*
2326		- * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
2327		- * not to have one.
2328		- *
2329		- * The purpose of this function is to make sure the vmalloc area
2330		- * mappings are identical in all page-tables in the system.
2331		- */
2332		-void __weak vmalloc_sync_mappings(void)
2333		-{
2334		-}
2335		-
2336		-void __weak vmalloc_sync_unmappings(void)
2337		-{
2338		-}
2339		-
2340		-static int f(pte_t pte, pgtable_t table, unsigned long addr, void data)
2341		-{
2342		- pte_t ***p = data;
2343		-
2344		- if (p) {
2345		- (p) = pte;
2346		- (*p)++;
2347		- }
2348		- return 0;
2349		-}
2350		-
2351		-/**
2352		- * alloc_vm_area - allocate a range of kernel address space
2353		- * @size: size of the area
2354		- * @ptes: returns the PTEs for the address space
2355		- *
2356		- * Returns: NULL on failure, vm_struct on success
2357		- *
2358		- * This function reserves a range of kernel address space, and
2359		- * allocates pagetables to map that range. No actual mappings
2360		- * are created.
2361		- *
2362		- * If @ptes is non-NULL, pointers to the PTEs (in init_mm)
2363		- * allocated for the VM area are returned.
2364		- */
2365		-struct vm_struct alloc_vm_area(size_t size, pte_t *ptes)
2366		-{
2367		- struct vm_struct *area;
2368		-
2369		- area = get_vm_area_caller(size, VM_IOREMAP,
2370		- __builtin_return_address(0));
2371		- if (area == NULL)
2372		- return NULL;
2373		-
2374		- /*
2375		- * This ensures that page tables are constructed for this region
2376		- * of kernel virtual address space and mapped into init_mm.
2377		- */
2378		- if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2379		- size, f, ptes ? &ptes : NULL)) {
2380		- free_vm_area(area);
2381		- return NULL;
2382		- }
2383		-
2384		- return area;
2385		-}
2386		-EXPORT_SYMBOL_GPL(alloc_vm_area);
2387	3097
2388	3098	void free_vm_area(struct vm_struct *area)
2389	3099	{
..	..	@@ -2401,81 +3111,64 @@
2401	3111	}
2402	3112
2403	3113	/**
2404		- * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
2405		- * @end: target address
2406		- * @pnext: out arg for the next vmap_area
2407		- * @pprev: out arg for the previous vmap_area
	3114	+ * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
	3115	+ * @addr: target address
2408	3116	*
2409		- * Returns: %true if either or both of next and prev are found,
2410		- * %false if no vmap_area exists
2411		- *
2412		- * Find vmap_areas end addresses of which enclose @end. ie. if not
2413		- * NULL, pnext->va_end > @end and pprev->va_end <= @end.
	3117	+ * Returns: vmap_area if it is found. If there is no such area
	3118	+ * the first highest(reverse order) vmap_area is returned
	3119	+ * i.e. va->va_start < addr && va->va_end < addr or NULL
	3120	+ * if there are no any areas before @addr.
2414	3121	*/
2415		-static bool pvm_find_next_prev(unsigned long end,
2416		- struct vmap_area **pnext,
2417		- struct vmap_area **pprev)
	3122	+static struct vmap_area *
	3123	+pvm_find_va_enclose_addr(unsigned long addr)
2418	3124	{
2419		- struct rb_node *n = vmap_area_root.rb_node;
2420		- struct vmap_area *va = NULL;
	3125	+ struct vmap_area va, tmp;
	3126	+ struct rb_node *n;
	3127	+
	3128	+ n = free_vmap_area_root.rb_node;
	3129	+ va = NULL;
2421	3130
2422	3131	while (n) {
2423		- va = rb_entry(n, struct vmap_area, rb_node);
2424		- if (end < va->va_end)
2425		- n = n->rb_left;
2426		- else if (end > va->va_end)
	3132	+ tmp = rb_entry(n, struct vmap_area, rb_node);
	3133	+ if (tmp->va_start <= addr) {
	3134	+ va = tmp;
	3135	+ if (tmp->va_end >= addr)
	3136	+ break;
	3137	+
2427	3138	n = n->rb_right;
2428		- else
2429		- break;
	3139	+ } else {
	3140	+ n = n->rb_left;
	3141	+ }
2430	3142	}
2431	3143
2432		- if (!va)
2433		- return false;
2434		-
2435		- if (va->va_end > end) {
2436		- *pnext = va;
2437		- pprev = node_to_va(rb_prev(&(pnext)->rb_node));
2438		- } else {
2439		- *pprev = va;
2440		- pnext = node_to_va(rb_next(&(pprev)->rb_node));
2441		- }
2442		- return true;
	3144	+ return va;
2443	3145	}
2444	3146
2445	3147	/**
2446		- * pvm_determine_end - find the highest aligned address between two vmap_areas
2447		- * @pnext: in/out arg for the next vmap_area
2448		- * @pprev: in/out arg for the previous vmap_area
2449		- * @align: alignment
	3148	+ * pvm_determine_end_from_reverse - find the highest aligned address
	3149	+ * of free block below VMALLOC_END
	3150	+ * @va:
	3151	+ * in - the VA we start the search(reverse order);
	3152	+ * out - the VA with the highest aligned end address.
2450	3153	*
2451		- * Returns: determined end address
2452		- *
2453		- * Find the highest aligned address between @pnext and @pprev below
2454		- * VMALLOC_END. @pnext and @pprev are adjusted so that the aligned
2455		- * down address is between the end addresses of the two vmap_areas.
2456		- *
2457		- * Please note that the address returned by this function may fall
2458		- * inside *@pnext vmap_area. The caller is responsible for checking
2459		- * that.
	3154	+ * Returns: determined end address within vmap_area
2460	3155	*/
2461		-static unsigned long pvm_determine_end(struct vmap_area **pnext,
2462		- struct vmap_area **pprev,
2463		- unsigned long align)
	3156	+static unsigned long
	3157	+pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
2464	3158	{
2465		- const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
	3159	+ unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2466	3160	unsigned long addr;
2467	3161
2468		- if (*pnext)
2469		- addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2470		- else
2471		- addr = vmalloc_end;
2472		-
2473		- while (pprev && (pprev)->va_end > addr) {
2474		- pnext = pprev;
2475		- pprev = node_to_va(rb_prev(&(pnext)->rb_node));
	3162	+ if (likely(*va)) {
	3163	+ list_for_each_entry_from_reverse((*va),
	3164	+ &free_vmap_area_list, list) {
	3165	+ addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
	3166	+ if ((*va)->va_start < addr)
	3167	+ return addr;
	3168	+ }
2476	3169	}
2477	3170
2478		- return addr;
	3171	+ return 0;
2479	3172	}
2480	3173
2481	3174	/**
..	..	@@ -2495,12 +3188,12 @@
2495	3188	* to gigabytes. To avoid interacting with regular vmallocs, these
2496	3189	* areas are allocated from top.
2497	3190	*
2498		- * Despite its complicated look, this allocator is rather simple. It
2499		- * does everything top-down and scans areas from the end looking for
2500		- * matching slot. While scanning, if any of the areas overlaps with
2501		- * existing vmap_area, the base address is pulled down to fit the
2502		- * area. Scanning is repeated till all the areas fit and then all
2503		- * necessary data structures are inserted and the result is returned.
	3191	+ * Despite its complicated look, this allocator is rather simple. It
	3192	+ * does everything top-down and scans free blocks from the end looking
	3193	+ * for matching base. While scanning, if any of the areas do not fit the
	3194	+ * base address is pulled down to fit the area. Scanning is repeated till
	3195	+ * all the areas fit and then all necessary data structures are inserted
	3196	+ * and the result is returned.
2504	3197	*/
2505	3198	struct vm_struct *pcpu_get_vm_areas(const unsigned long offsets,
2506	3199	const size_t *sizes, int nr_vms,
..	..	@@ -2508,11 +3201,12 @@
2508	3201	{
2509	3202	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2510	3203	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2511		- struct vmap_area *vas, prev, *next;
	3204	+ struct vmap_area *vas, va;
2512	3205	struct vm_struct **vms;
2513	3206	int area, area2, last_area, term_area;
2514		- unsigned long base, start, end, last_end;
	3207	+ unsigned long base, start, size, end, last_end, orig_start, orig_end;
2515	3208	bool purged = false;
	3209	+ enum fit_type type;
2516	3210
2517	3211	/* verify parameters and allocate data structures */
2518	3212	BUG_ON(offset_in_page(align) \|\| !is_power_of_2(align));
..	..	@@ -2548,62 +3242,52 @@
2548	3242	goto err_free2;
2549	3243
2550	3244	for (area = 0; area < nr_vms; area++) {
2551		- vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
	3245	+ vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
2552	3246	vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2553	3247	if (!vas[area] \|\| !vms[area])
2554	3248	goto err_free;
2555	3249	}
2556	3250	retry:
2557		- spin_lock(&vmap_area_lock);
	3251	+ spin_lock(&free_vmap_area_lock);
2558	3252
2559	3253	/* start scanning - we scan from the top, begin with the last area */
2560	3254	area = term_area = last_area;
2561	3255	start = offsets[area];
2562	3256	end = start + sizes[area];
2563	3257
2564		- if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2565		- base = vmalloc_end - last_end;
2566		- goto found;
2567		- }
2568		- base = pvm_determine_end(&next, &prev, align) - end;
	3258	+ va = pvm_find_va_enclose_addr(vmalloc_end);
	3259	+ base = pvm_determine_end_from_reverse(&va, align) - end;
2569	3260
2570	3261	while (true) {
2571		- BUG_ON(next && next->va_end <= base + end);
2572		- BUG_ON(prev && prev->va_end > base + end);
2573		-
2574	3262	/*
2575	3263	* base might have underflowed, add last_end before
2576	3264	* comparing.
2577	3265	*/
2578		- if (base + last_end < vmalloc_start + last_end) {
2579		- spin_unlock(&vmap_area_lock);
2580		- if (!purged) {
2581		- purge_vmap_area_lazy();
2582		- purged = true;
2583		- goto retry;
2584		- }
2585		- goto err_free;
2586		- }
	3266	+ if (base + last_end < vmalloc_start + last_end)
	3267	+ goto overflow;
2587	3268
2588	3269	/*
2589		- * If next overlaps, move base downwards so that it's
2590		- * right below next and then recheck.
	3270	+ * Fitting base has not been found.
2591	3271	*/
2592		- if (next && next->va_start < base + end) {
2593		- base = pvm_determine_end(&next, &prev, align) - end;
	3272	+ if (va == NULL)
	3273	+ goto overflow;
	3274	+
	3275	+ /*
	3276	+ * If required width exceeds current VA block, move
	3277	+ * base downwards and then recheck.
	3278	+ */
	3279	+ if (base + end > va->va_end) {
	3280	+ base = pvm_determine_end_from_reverse(&va, align) - end;
2594	3281	term_area = area;
2595	3282	continue;
2596	3283	}
2597	3284
2598	3285	/*
2599		- * If prev overlaps, shift down next and prev and move
2600		- * base so that it's right below new next and then
2601		- * recheck.
	3286	+ * If this VA does not fit, move base downwards and recheck.
2602	3287	*/
2603		- if (prev && prev->va_end > base + start) {
2604		- next = prev;
2605		- prev = node_to_va(rb_prev(&next->rb_node));
2606		- base = pvm_determine_end(&next, &prev, align) - end;
	3288	+ if (base + start < va->va_start) {
	3289	+ va = node_to_va(rb_prev(&va->rb_node));
	3290	+ base = pvm_determine_end_from_reverse(&va, align) - end;
2607	3291	term_area = area;
2608	3292	continue;
2609	3293	}
..	..	@@ -2615,38 +3299,132 @@
2615	3299	area = (area + nr_vms - 1) % nr_vms;
2616	3300	if (area == term_area)
2617	3301	break;
	3302	+
2618	3303	start = offsets[area];
2619	3304	end = start + sizes[area];
2620		- pvm_find_next_prev(base + end, &next, &prev);
	3305	+ va = pvm_find_va_enclose_addr(base + end);
2621	3306	}
2622		-found:
	3307	+
2623	3308	/* we've found a fitting base, insert all va's */
2624	3309	for (area = 0; area < nr_vms; area++) {
2625		- struct vmap_area *va = vas[area];
	3310	+ int ret;
2626	3311
2627		- va->va_start = base + offsets[area];
2628		- va->va_end = va->va_start + sizes[area];
2629		- __insert_vmap_area(va);
	3312	+ start = base + offsets[area];
	3313	+ size = sizes[area];
	3314	+
	3315	+ va = pvm_find_va_enclose_addr(start);
	3316	+ if (WARN_ON_ONCE(va == NULL))
	3317	+ /* It is a BUG(), but trigger recovery instead. */
	3318	+ goto recovery;
	3319	+
	3320	+ type = classify_va_fit_type(va, start, size);
	3321	+ if (WARN_ON_ONCE(type == NOTHING_FIT))
	3322	+ /* It is a BUG(), but trigger recovery instead. */
	3323	+ goto recovery;
	3324	+
	3325	+ ret = adjust_va_to_fit_type(va, start, size, type);
	3326	+ if (unlikely(ret))
	3327	+ goto recovery;
	3328	+
	3329	+ /* Allocated area. */
	3330	+ va = vas[area];
	3331	+ va->va_start = start;
	3332	+ va->va_end = start + size;
2630	3333	}
2631	3334
2632		- vmap_area_pcpu_hole = base + offsets[last_area];
	3335	+ spin_unlock(&free_vmap_area_lock);
2633	3336
2634		- spin_unlock(&vmap_area_lock);
	3337	+ /* populate the kasan shadow space */
	3338	+ for (area = 0; area < nr_vms; area++) {
	3339	+ if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
	3340	+ goto err_free_shadow;
	3341	+
	3342	+ kasan_unpoison_vmalloc((void *)vas[area]->va_start,
	3343	+ sizes[area]);
	3344	+ }
2635	3345
2636	3346	/* insert all vm's */
2637		- for (area = 0; area < nr_vms; area++)
2638		- setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
	3347	+ spin_lock(&vmap_area_lock);
	3348	+ for (area = 0; area < nr_vms; area++) {
	3349	+ insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
	3350	+
	3351	+ setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
2639	3352	pcpu_get_vm_areas);
	3353	+ }
	3354	+ spin_unlock(&vmap_area_lock);
2640	3355
2641	3356	kfree(vas);
2642	3357	return vms;
2643	3358
	3359	+recovery:
	3360	+ /*
	3361	+ * Remove previously allocated areas. There is no
	3362	+ * need in removing these areas from the busy tree,
	3363	+ * because they are inserted only on the final step
	3364	+ * and when pcpu_get_vm_areas() is success.
	3365	+ */
	3366	+ while (area--) {
	3367	+ orig_start = vas[area]->va_start;
	3368	+ orig_end = vas[area]->va_end;
	3369	+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
	3370	+ &free_vmap_area_list);
	3371	+ if (va)
	3372	+ kasan_release_vmalloc(orig_start, orig_end,
	3373	+ va->va_start, va->va_end);
	3374	+ vas[area] = NULL;
	3375	+ }
	3376	+
	3377	+overflow:
	3378	+ spin_unlock(&free_vmap_area_lock);
	3379	+ if (!purged) {
	3380	+ purge_vmap_area_lazy();
	3381	+ purged = true;
	3382	+
	3383	+ /* Before "retry", check if we recover. */
	3384	+ for (area = 0; area < nr_vms; area++) {
	3385	+ if (vas[area])
	3386	+ continue;
	3387	+
	3388	+ vas[area] = kmem_cache_zalloc(
	3389	+ vmap_area_cachep, GFP_KERNEL);
	3390	+ if (!vas[area])
	3391	+ goto err_free;
	3392	+ }
	3393	+
	3394	+ goto retry;
	3395	+ }
	3396	+
2644	3397	err_free:
2645	3398	for (area = 0; area < nr_vms; area++) {
2646		- kfree(vas[area]);
	3399	+ if (vas[area])
	3400	+ kmem_cache_free(vmap_area_cachep, vas[area]);
	3401	+
2647	3402	kfree(vms[area]);
2648	3403	}
2649	3404	err_free2:
	3405	+ kfree(vas);
	3406	+ kfree(vms);
	3407	+ return NULL;
	3408	+
	3409	+err_free_shadow:
	3410	+ spin_lock(&free_vmap_area_lock);
	3411	+ /*
	3412	+ * We release all the vmalloc shadows, even the ones for regions that
	3413	+ * hadn't been successfully added. This relies on kasan_release_vmalloc
	3414	+ * being able to tolerate this case.
	3415	+ */
	3416	+ for (area = 0; area < nr_vms; area++) {
	3417	+ orig_start = vas[area]->va_start;
	3418	+ orig_end = vas[area]->va_end;
	3419	+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
	3420	+ &free_vmap_area_list);
	3421	+ if (va)
	3422	+ kasan_release_vmalloc(orig_start, orig_end,
	3423	+ va->va_start, va->va_end);
	3424	+ vas[area] = NULL;
	3425	+ kfree(vms[area]);
	3426	+ }
	3427	+ spin_unlock(&free_vmap_area_lock);
2650	3428	kfree(vas);
2651	3429	kfree(vms);
2652	3430	return NULL;
..	..	@@ -2671,9 +3449,12 @@
2671	3449
2672	3450	#ifdef CONFIG_PROC_FS
2673	3451	static void s_start(struct seq_file m, loff_t *pos)
	3452	+ __acquires(&vmap_purge_lock)
2674	3453	__acquires(&vmap_area_lock)
2675	3454	{
	3455	+ mutex_lock(&vmap_purge_lock);
2676	3456	spin_lock(&vmap_area_lock);
	3457	+
2677	3458	return seq_list_start(&vmap_area_list, *pos);
2678	3459	}
2679	3460
..	..	@@ -2684,8 +3465,10 @@
2684	3465
2685	3466	static void s_stop(struct seq_file m, void p)
2686	3467	__releases(&vmap_area_lock)
	3468	+ __releases(&vmap_purge_lock)
2687	3469	{
2688	3470	spin_unlock(&vmap_area_lock);
	3471	+ mutex_unlock(&vmap_purge_lock);
2689	3472	}
2690	3473
2691	3474	static void show_numa_info(struct seq_file m, struct vm_struct v)
..	..	@@ -2712,6 +3495,22 @@
2712	3495	}
2713	3496	}
2714	3497
	3498	+static void show_purge_info(struct seq_file *m)
	3499	+{
	3500	+ struct llist_node *head;
	3501	+ struct vmap_area *va;
	3502	+
	3503	+ head = READ_ONCE(vmap_purge_list.first);
	3504	+ if (head == NULL)
	3505	+ return;
	3506	+
	3507	+ llist_for_each_entry(va, head, purge_list) {
	3508	+ seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
	3509	+ (void )va->va_start, (void )va->va_end,
	3510	+ va->va_end - va->va_start);
	3511	+ }
	3512	+}
	3513	+
2715	3514	static int s_show(struct seq_file m, void p)
2716	3515	{
2717	3516	struct vmap_area *va;
..	..	@@ -2720,14 +3519,13 @@
2720	3519	va = list_entry(p, struct vmap_area, list);
2721	3520
2722	3521	/*
2723		- * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
2724		- * behalf of vmap area is being tear down or vm_map_ram allocation.
	3522	+ * s_show can encounter race with remove_vm_area, !vm on behalf
	3523	+ * of vmap area is being tear down or vm_map_ram allocation.
2725	3524	*/
2726		- if (!(va->flags & VM_VM_AREA)) {
2727		- seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
	3525	+ if (!va->vm) {
	3526	+ seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
2728	3527	(void )va->va_start, (void )va->va_end,
2729		- va->va_end - va->va_start,
2730		- va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
	3528	+ va->va_end - va->va_start);
2731	3529
2732	3530	return 0;
2733	3531	}
..	..	@@ -2758,11 +3556,25 @@
2758	3556	if (v->flags & VM_USERMAP)
2759	3557	seq_puts(m, " user");
2760	3558
	3559	+ if (v->flags & VM_DMA_COHERENT)
	3560	+ seq_puts(m, " dma-coherent");
	3561	+
2761	3562	if (is_vmalloc_addr(v->pages))
2762	3563	seq_puts(m, " vpages");
2763	3564
2764	3565	show_numa_info(m, v);
	3566	+ trace_android_vh_show_stack_hash(m, v);
2765	3567	seq_putc(m, '\n');
	3568	+
	3569	+ /*
	3570	+ * As a final step, dump "unpurged" areas. Note,
	3571	+ * that entire "/proc/vmallocinfo" output will not
	3572	+ * be address sorted, because the purge list is not
	3573	+ * sorted.
	3574	+ */
	3575	+ if (list_is_last(&va->list, &vmap_area_list))
	3576	+ show_purge_info(m);
	3577	+
2766	3578	return 0;
2767	3579	}
2768	3580
..	..	@@ -2786,4 +3598,3 @@
2786	3598	module_init(proc_vmalloc_init);
2787	3599
2788	3600	#endif
2789		-