~hc/RK356X_SDK_RELEASE.git

..	..	@@ -21,8 +21,7 @@
21	21	#include <linux/vmalloc.h>
22	22	#include <linux/swap_slots.h>
23	23	#include <linux/huge_mm.h>
24		-
25		-#include <asm/pgtable.h>
	24	+#include <linux/shmem_fs.h>
26	25	#include "internal.h"
27	26
28	27	/*
..	..	@@ -59,8 +58,8 @@
59	58	#define GET_SWAP_RA_VAL(vma) \
60	59	(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
61	60
62		-#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
63		-#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0)
	61	+#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++)
	62	+#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr))
64	63
65	64	static struct {
66	65	unsigned long add_total;
..	..	@@ -74,25 +73,27 @@
74	73	unsigned int i, j, nr;
75	74	unsigned long ret = 0;
76	75	struct address_space *spaces;
	76	+ struct swap_info_struct *si;
77	77
78		- rcu_read_lock();
79	78	for (i = 0; i < MAX_SWAPFILES; i++) {
80		- /*
81		- * The corresponding entries in nr_swapper_spaces and
82		- * swapper_spaces will be reused only after at least
83		- * one grace period. So it is impossible for them
84		- * belongs to different usage.
85		- */
86		- nr = nr_swapper_spaces[i];
87		- spaces = rcu_dereference(swapper_spaces[i]);
88		- if (!nr \|\| !spaces)
	79	+ swp_entry_t entry = swp_entry(i, 1);
	80	+
	81	+ /* Avoid get_swap_device() to warn for bad swap entry */
	82	+ if (!swp_swap_info(entry))
89	83	continue;
	84	+ /* Prevent swapoff to free swapper_spaces */
	85	+ si = get_swap_device(entry);
	86	+ if (!si)
	87	+ continue;
	88	+ nr = nr_swapper_spaces[i];
	89	+ spaces = swapper_spaces[i];
90	90	for (j = 0; j < nr; j++)
91	91	ret += spaces[j].nrpages;
	92	+ put_swap_device(si);
92	93	}
93		- rcu_read_unlock();
94	94	return ret;
95	95	}
	96	+EXPORT_SYMBOL_GPL(total_swapcache_pages);
96	97
97	98	static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
98	99
..	..	@@ -107,15 +108,32 @@
107	108	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
108	109	}
109	110
	111	+void *get_shadow_from_swap_cache(swp_entry_t entry)
	112	+{
	113	+ struct address_space *address_space = swap_address_space(entry);
	114	+ pgoff_t idx = swp_offset(entry);
	115	+ struct page *page;
	116	+
	117	+ page = find_get_entry(address_space, idx);
	118	+ if (xa_is_value(page))
	119	+ return page;
	120	+ if (page)
	121	+ put_page(page);
	122	+ return NULL;
	123	+}
	124	+
110	125	/*
111		- * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
	126	+ * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
112	127	* but sets SwapCache flag and private instead of mapping and index.
113	128	*/
114		-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
	129	+int add_to_swap_cache(struct page *page, swp_entry_t entry,
	130	+ gfp_t gfp, void **shadowp)
115	131	{
116		- int error, i, nr = hpage_nr_pages(page);
117		- struct address_space *address_space;
	132	+ struct address_space *address_space = swap_address_space(entry);
118	133	pgoff_t idx = swp_offset(entry);
	134	+ XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
	135	+ unsigned long i, nr = thp_nr_pages(page);
	136	+ void *old;
119	137
120	138	VM_BUG_ON_PAGE(!PageLocked(page), page);
121	139	VM_BUG_ON_PAGE(PageSwapCache(page), page);
..	..	@@ -124,75 +142,66 @@
124	142	page_ref_add(page, nr);
125	143	SetPageSwapCache(page);
126	144
127		- address_space = swap_address_space(entry);
128		- xa_lock_irq(&address_space->i_pages);
129		- for (i = 0; i < nr; i++) {
130		- set_page_private(page + i, entry.val + i);
131		- error = radix_tree_insert(&address_space->i_pages,
132		- idx + i, page + i);
133		- if (unlikely(error))
134		- break;
135		- }
136		- if (likely(!error)) {
	145	+ do {
	146	+ unsigned long nr_shadows = 0;
	147	+
	148	+ xas_lock_irq(&xas);
	149	+ xas_create_range(&xas);
	150	+ if (xas_error(&xas))
	151	+ goto unlock;
	152	+ for (i = 0; i < nr; i++) {
	153	+ VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
	154	+ old = xas_load(&xas);
	155	+ if (xa_is_value(old)) {
	156	+ nr_shadows++;
	157	+ if (shadowp)
	158	+ *shadowp = old;
	159	+ }
	160	+ set_page_private(page + i, entry.val + i);
	161	+ xas_store(&xas, page);
	162	+ xas_next(&xas);
	163	+ }
	164	+ address_space->nrexceptional -= nr_shadows;
137	165	address_space->nrpages += nr;
138	166	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
139	167	ADD_CACHE_INFO(add_total, nr);
140		- } else {
141		- /*
142		- * Only the context which have set SWAP_HAS_CACHE flag
143		- * would call add_to_swap_cache().
144		- * So add_to_swap_cache() doesn't returns -EEXIST.
145		- */
146		- VM_BUG_ON(error == -EEXIST);
147		- set_page_private(page + i, 0UL);
148		- while (i--) {
149		- radix_tree_delete(&address_space->i_pages, idx + i);
150		- set_page_private(page + i, 0UL);
151		- }
152		- ClearPageSwapCache(page);
153		- page_ref_sub(page, nr);
154		- }
155		- xa_unlock_irq(&address_space->i_pages);
	168	+unlock:
	169	+ xas_unlock_irq(&xas);
	170	+ } while (xas_nomem(&xas, gfp));
156	171
157		- return error;
158		-}
	172	+ if (!xas_error(&xas))
	173	+ return 0;
159	174
160		-
161		-int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
162		-{
163		- int error;
164		-
165		- error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
166		- if (!error) {
167		- error = __add_to_swap_cache(page, entry);
168		- radix_tree_preload_end();
169		- }
170		- return error;
	175	+ ClearPageSwapCache(page);
	176	+ page_ref_sub(page, nr);
	177	+ return xas_error(&xas);
171	178	}
172	179
173	180	/*
174	181	* This must be called only on pages that have
175	182	* been verified to be in the swap cache.
176	183	*/
177		-void __delete_from_swap_cache(struct page *page)
	184	+void __delete_from_swap_cache(struct page *page,
	185	+ swp_entry_t entry, void *shadow)
178	186	{
179		- struct address_space *address_space;
180		- int i, nr = hpage_nr_pages(page);
181		- swp_entry_t entry;
182		- pgoff_t idx;
	187	+ struct address_space *address_space = swap_address_space(entry);
	188	+ int i, nr = thp_nr_pages(page);
	189	+ pgoff_t idx = swp_offset(entry);
	190	+ XA_STATE(xas, &address_space->i_pages, idx);
183	191
184	192	VM_BUG_ON_PAGE(!PageLocked(page), page);
185	193	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
186	194	VM_BUG_ON_PAGE(PageWriteback(page), page);
187	195
188		- entry.val = page_private(page);
189		- address_space = swap_address_space(entry);
190		- idx = swp_offset(entry);
191	196	for (i = 0; i < nr; i++) {
192		- radix_tree_delete(&address_space->i_pages, idx + i);
	197	+ void *entry = xas_store(&xas, shadow);
	198	+ VM_BUG_ON_PAGE(entry != page, entry);
193	199	set_page_private(page + i, 0);
	200	+ xas_next(&xas);
194	201	}
195	202	ClearPageSwapCache(page);
	203	+ if (shadow)
	204	+ address_space->nrexceptional += nr;
196	205	address_space->nrpages -= nr;
197	206	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
198	207	ADD_CACHE_INFO(del_total, nr);
..	..	@@ -218,7 +227,7 @@
218	227	return 0;
219	228
220	229	/*
221		- * Radix-tree node allocations from PF_MEMALLOC contexts could
	230	+ * XArray node allocations from PF_MEMALLOC contexts could
222	231	* completely exhaust the page allocator. __GFP_NOMEMALLOC
223	232	* stops emergency reserves from being allocated.
224	233	*
..	..	@@ -229,8 +238,7 @@
229	238	* Add it to the swap cache.
230	239	*/
231	240	err = add_to_swap_cache(page, entry,
232		- __GFP_HIGH\|__GFP_NOMEMALLOC\|__GFP_NOWARN);
233		- /* -ENOMEM radix-tree allocation failure */
	241	+ __GFP_HIGH\|__GFP_NOMEMALLOC\|__GFP_NOWARN, NULL);
234	242	if (err)
235	243	/*
236	244	* add_to_swap_cache() doesn't return -EEXIST, so we can safely
..	..	@@ -239,7 +247,7 @@
239	247	goto fail;
240	248	/*
241	249	* Normally the page will be dirtied in unmap because its pte should be
242		- * dirty. A special case is MADV_FREE page. The page'e pte could have
	250	+ * dirty. A special case is MADV_FREE page. The page's pte could have
243	251	* dirty bit cleared but the page's SwapBacked bit is still set because
244	252	* clearing the dirty bit and SwapBacked bit has no lock protected. For
245	253	* such page, unmap will not set dirty bit for it, so page reclaim will
..	..	@@ -264,18 +272,46 @@
264	272	*/
265	273	void delete_from_swap_cache(struct page *page)
266	274	{
267		- swp_entry_t entry;
268		- struct address_space *address_space;
	275	+ swp_entry_t entry = { .val = page_private(page) };
	276	+ struct address_space *address_space = swap_address_space(entry);
269	277
270		- entry.val = page_private(page);
271		-
272		- address_space = swap_address_space(entry);
273	278	xa_lock_irq(&address_space->i_pages);
274		- __delete_from_swap_cache(page);
	279	+ __delete_from_swap_cache(page, entry, NULL);
275	280	xa_unlock_irq(&address_space->i_pages);
276	281
277	282	put_swap_page(page, entry);
278		- page_ref_sub(page, hpage_nr_pages(page));
	283	+ page_ref_sub(page, thp_nr_pages(page));
	284	+}
	285	+
	286	+void clear_shadow_from_swap_cache(int type, unsigned long begin,
	287	+ unsigned long end)
	288	+{
	289	+ unsigned long curr = begin;
	290	+ void *old;
	291	+
	292	+ for (;;) {
	293	+ unsigned long nr_shadows = 0;
	294	+ swp_entry_t entry = swp_entry(type, curr);
	295	+ struct address_space *address_space = swap_address_space(entry);
	296	+ XA_STATE(xas, &address_space->i_pages, curr);
	297	+
	298	+ xa_lock_irq(&address_space->i_pages);
	299	+ xas_for_each(&xas, old, end) {
	300	+ if (!xa_is_value(old))
	301	+ continue;
	302	+ xas_store(&xas, NULL);
	303	+ nr_shadows++;
	304	+ }
	305	+ address_space->nrexceptional -= nr_shadows;
	306	+ xa_unlock_irq(&address_space->i_pages);
	307	+
	308	+ /* search the next swapcache until we meet end */
	309	+ curr >>= SWAP_ADDRESS_SPACE_SHIFT;
	310	+ curr++;
	311	+ curr <<= SWAP_ADDRESS_SPACE_SHIFT;
	312	+ if (curr > end)
	313	+ break;
	314	+ }
279	315	}
280	316
281	317	/*
..	..	@@ -335,8 +371,13 @@
335	371	unsigned long addr)
336	372	{
337	373	struct page *page;
	374	+ struct swap_info_struct *si;
338	375
	376	+ si = get_swap_device(entry);
	377	+ if (!si)
	378	+ return NULL;
339	379	page = find_get_page(swap_address_space(entry), swp_offset(entry));
	380	+ put_swap_device(si);
340	381
341	382	INC_CACHE_INFO(find_total);
342	383	if (page) {
..	..	@@ -375,24 +416,64 @@
375	416	return page;
376	417	}
377	418
	419	+/**
	420	+ * find_get_incore_page - Find and get a page from the page or swap caches.
	421	+ * @mapping: The address_space to search.
	422	+ * @index: The page cache index.
	423	+ *
	424	+ * This differs from find_get_page() in that it will also look for the
	425	+ * page in the swap cache.
	426	+ *
	427	+ * Return: The found page or %NULL.
	428	+ */
	429	+struct page find_get_incore_page(struct address_space mapping, pgoff_t index)
	430	+{
	431	+ swp_entry_t swp;
	432	+ struct swap_info_struct *si;
	433	+ struct page *page = find_get_entry(mapping, index);
	434	+
	435	+ if (!page)
	436	+ return page;
	437	+ if (!xa_is_value(page))
	438	+ return find_subpage(page, index);
	439	+ if (!shmem_mapping(mapping))
	440	+ return NULL;
	441	+
	442	+ swp = radix_to_swp_entry(page);
	443	+ /* Prevent swapoff from happening to us */
	444	+ si = get_swap_device(swp);
	445	+ if (!si)
	446	+ return NULL;
	447	+ page = find_get_page(swap_address_space(swp), swp_offset(swp));
	448	+ put_swap_device(si);
	449	+ return page;
	450	+}
	451	+
378	452	struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
379	453	struct vm_area_struct *vma, unsigned long addr,
380	454	bool *new_page_allocated)
381	455	{
382		- struct page found_page, new_page = NULL;
383		- struct address_space *swapper_space = swap_address_space(entry);
384		- int err;
	456	+ struct swap_info_struct *si;
	457	+ struct page *page;
	458	+ void *shadow = NULL;
	459	+
385	460	*new_page_allocated = false;
386	461
387		- do {
	462	+ for (;;) {
	463	+ int err;
388	464	/*
389	465	* First check the swap cache. Since this is normally
390	466	* called after lookup_swap_cache() failed, re-calling
391	467	* that would confuse statistics.
392	468	*/
393		- found_page = find_get_page(swapper_space, swp_offset(entry));
394		- if (found_page)
395		- break;
	469	+ si = get_swap_device(entry);
	470	+ if (!si)
	471	+ return NULL;
	472	+ page = find_get_page(swap_address_space(entry),
	473	+ swp_offset(entry));
	474	+ put_swap_device(si);
	475	+ if (page)
	476	+ return page;
396	477
397	478	/*
398	479	* Just skip read ahead for unused swap slot.
..	..	@@ -403,69 +484,69 @@
403	484	* else swap_off will be aborted if we return NULL.
404	485	*/
405	486	if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
406		- break;
	487	+ return NULL;
407	488
408	489	/*
409		- * Get a new page to read into from swap.
	490	+ * Get a new page to read into from swap. Allocate it now,
	491	+ * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
	492	+ * cause any racers to loop around until we add it to cache.
410	493	*/
411		- if (!new_page) {
412		- new_page = alloc_page_vma(gfp_mask, vma, addr);
413		- if (!new_page)
414		- break; /* Out of memory */
415		- }
416		-
417		- /*
418		- * call radix_tree_preload() while we can wait.
419		- */
420		- err = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
421		- if (err)
422		- break;
	494	+ page = alloc_page_vma(gfp_mask, vma, addr);
	495	+ if (!page)
	496	+ return NULL;
423	497
424	498	/*
425	499	* Swap entry may have been freed since our caller observed it.
426	500	*/
427	501	err = swapcache_prepare(entry);
428		- if (err == -EEXIST) {
429		- radix_tree_preload_end();
430		- /*
431		- * We might race against get_swap_page() and stumble
432		- * across a SWAP_HAS_CACHE swap_map entry whose page
433		- * has not been brought into the swapcache yet.
434		- */
435		- cond_resched();
436		- continue;
437		- }
438		- if (err) { /* swp entry is obsolete ? */
439		- radix_tree_preload_end();
	502	+ if (!err)
440	503	break;
441		- }
442	504
443		- /* May fail (-ENOMEM) if radix-tree node allocation failed. */
444		- __SetPageLocked(new_page);
445		- __SetPageSwapBacked(new_page);
446		- err = __add_to_swap_cache(new_page, entry);
447		- if (likely(!err)) {
448		- radix_tree_preload_end();
449		- /*
450		- * Initiate read into locked page and return.
451		- */
452		- SetPageWorkingset(new_page);
453		- lru_cache_add_anon(new_page);
454		- *new_page_allocated = true;
455		- return new_page;
456		- }
457		- radix_tree_preload_end();
458		- __ClearPageLocked(new_page);
	505	+ put_page(page);
	506	+ if (err != -EEXIST)
	507	+ return NULL;
	508	+
459	509	/*
460		- * add_to_swap_cache() doesn't return -EEXIST, so we can safely
461		- * clear SWAP_HAS_CACHE flag.
	510	+ * We might race against __delete_from_swap_cache(), and
	511	+ * stumble across a swap_map entry whose SWAP_HAS_CACHE
	512	+ * has not yet been cleared. Or race against another
	513	+ * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
	514	+ * in swap_map, but not yet added its page to swap cache.
462	515	*/
463		- put_swap_page(new_page, entry);
464		- } while (err != -ENOMEM);
	516	+ schedule_timeout_uninterruptible(1);
	517	+ }
465	518
466		- if (new_page)
467		- put_page(new_page);
468		- return found_page;
	519	+ /*
	520	+ * The swap entry is ours to swap in. Prepare the new page.
	521	+ */
	522	+
	523	+ __SetPageLocked(page);
	524	+ __SetPageSwapBacked(page);
	525	+
	526	+ /* May fail (-ENOMEM) if XArray node allocation failed. */
	527	+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) {
	528	+ put_swap_page(page, entry);
	529	+ goto fail_unlock;
	530	+ }
	531	+
	532	+ if (mem_cgroup_charge(page, NULL, gfp_mask)) {
	533	+ delete_from_swap_cache(page);
	534	+ goto fail_unlock;
	535	+ }
	536	+
	537	+ if (shadow)
	538	+ workingset_refault(page, shadow);
	539	+
	540	+ /* Caller will initiate read into locked page */
	541	+ SetPageWorkingset(page);
	542	+ lru_cache_add(page);
	543	+ *new_page_allocated = true;
	544	+ return page;
	545	+
	546	+fail_unlock:
	547	+ unlock_page(page);
	548	+ put_page(page);
	549	+ return NULL;
469	550	}
470	551
471	552	/*
..	..	@@ -565,6 +646,10 @@
565	646	* the readahead.
566	647	*
567	648	* Caller must hold down_read on the vma->vm_mm if vmf->vma is not NULL.
	649	+ * This is needed to ensure the VMA will not be freed in our back. In the case
	650	+ * of the speculative page fault handler, this cannot happen, even if we don't
	651	+ * hold the mmap_sem. Callees are assumed to take care of reading VMA's fields
	652	+ * using READ_ONCE() to read consistent values.
568	653	*/
569	654	struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
570	655	struct vm_fault *vmf)
..	..	@@ -583,6 +668,13 @@
583	668	mask = swapin_nr_pages(offset) - 1;
584	669	if (!mask)
585	670	goto skip;
	671	+
	672	+ /* Test swap type to make sure the dereference is safe */
	673	+ if (likely(si->flags & (SWP_BLKDEV \| SWP_FS_OPS))) {
	674	+ struct inode *inode = si->swap_file->f_mapping->host;
	675	+ if (inode_read_congested(inode))
	676	+ goto skip;
	677	+ }
586	678
587	679	do_poll = false;
588	680	/* Read a page_cluster sized and aligned cluster around offset. */
..	..	@@ -628,27 +720,23 @@
628	720	return -ENOMEM;
629	721	for (i = 0; i < nr; i++) {
630	722	space = spaces + i;
631		- INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC\|__GFP_NOWARN);
	723	+ xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
632	724	atomic_set(&space->i_mmap_writable, 0);
633	725	space->a_ops = &swap_aops;
634	726	/* swap cache doesn't use writeback related tags */
635	727	mapping_set_no_writeback_tags(space);
636	728	}
637	729	nr_swapper_spaces[type] = nr;
638		- rcu_assign_pointer(swapper_spaces[type], spaces);
	730	+ swapper_spaces[type] = spaces;
639	731
640	732	return 0;
641	733	}
642	734
643	735	void exit_swap_address_space(unsigned int type)
644	736	{
645		- struct address_space *spaces;
646		-
647		- spaces = swapper_spaces[type];
	737	+ kvfree(swapper_spaces[type]);
648	738	nr_swapper_spaces[type] = 0;
649		- rcu_assign_pointer(swapper_spaces[type], NULL);
650		- synchronize_rcu();
651		- kvfree(spaces);
	739	+ swapper_spaces[type] = NULL;
652	740	}
653	741
654	742	static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
..	..	@@ -658,9 +746,9 @@
658	746	unsigned long *start,
659	747	unsigned long *end)
660	748	{
661		- *start = max3(lpfn, PFN_DOWN(vma->vm_start),
	749	+ *start = max3(lpfn, PFN_DOWN(READ_ONCE(vma->vm_start)),
662	750	PFN_DOWN(faddr & PMD_MASK));
663		- *end = min3(rpfn, PFN_DOWN(vma->vm_end),
	751	+ *end = min3(rpfn, PFN_DOWN(READ_ONCE(vma->vm_end)),
664	752	PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
665	753	}
666	754
..	..	@@ -732,6 +820,20 @@
732	820	pte_unmap(orig_pte);
733	821	}
734	822
	823	+/**
	824	+ * swap_vma_readahead - swap in pages in hope we need them soon
	825	+ * @fentry: swap entry of this memory
	826	+ * @gfp_mask: memory allocation flags
	827	+ * @vmf: fault information
	828	+ *
	829	+ * Returns the struct page for entry and addr, after queueing swapin.
	830	+ *
	831	+ * Primitive swap readahead code. We simply read in a few pages whoes
	832	+ * virtual addresses are around the fault address in the same vma.
	833	+ *
	834	+ * Caller must hold read mmap_lock if vmf->vma is not NULL.
	835	+ *
	836	+ */
735	837	static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
736	838	struct vm_fault *vmf)
737	839	{