~hc/RK356X_SDK_RELEASE.git

..	..	@@ -18,6 +18,8 @@
18	18	#include <linux/pageblock-flags.h>
19	19	#include <linux/page-flags-layout.h>
20	20	#include <linux/atomic.h>
	21	+#include <linux/mm_types.h>
	22	+#include <linux/page-flags.h>
21	23	#include <linux/android_kabi.h>
22	24	#include <asm/page.h>
23	25
..	..	@@ -36,6 +38,8 @@
36	38	* will not.
37	39	*/
38	40	#define PAGE_ALLOC_COSTLY_ORDER 3
	41	+
	42	+#define MAX_KSWAPD_THREADS 16
39	43
40	44	enum migratetype {
41	45	MIGRATE_UNMOVABLE,
..	..	@@ -66,7 +70,7 @@
66	70	};
67	71
68	72	/* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
69		-extern char * const migratetype_names[MIGRATE_TYPES];
	73	+extern const char * const migratetype_names[MIGRATE_TYPES];
70	74
71	75	#ifdef CONFIG_CMA
72	76	# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
..	..	@@ -89,17 +93,27 @@
89	93
90	94	extern int page_group_by_mobility_disabled;
91	95
92		-#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
93		-#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
	96	+#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
94	97
95	98	#define get_pageblock_migratetype(page) \
96		- get_pfnblock_flags_mask(page, page_to_pfn(page), \
97		- PB_migrate_end, MIGRATETYPE_MASK)
	99	+ get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
98	100
99	101	struct free_area {
100	102	struct list_head free_list[MIGRATE_TYPES];
101	103	unsigned long nr_free;
102	104	};
	105	+
	106	+static inline struct page get_page_from_free_area(struct free_area area,
	107	+ int migratetype)
	108	+{
	109	+ return list_first_entry_or_null(&area->free_list[migratetype],
	110	+ struct page, lru);
	111	+}
	112	+
	113	+static inline bool free_area_empty(struct free_area *area, int migratetype)
	114	+{
	115	+ return list_empty(&area->free_list[migratetype]);
	116	+}
103	117
104	118	struct pglist_data;
105	119
..	..	@@ -144,15 +158,9 @@
144	158	NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
145	159	NR_MLOCK, /* mlock()ed pages found and moved off LRU */
146	160	NR_PAGETABLE, /* used for pagetables */
147		- NR_KERNEL_STACK_KB, /* measured in KiB */
148		-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
149		- NR_KERNEL_SCS_BYTES, /* measured in bytes */
150		-#endif
151	161	/* Second 128 byte cacheline */
152	162	NR_BOUNCE,
153		-#if IS_ENABLED(CONFIG_ZSMALLOC)
154	163	NR_ZSPAGES, /* allocated in zsmalloc */
155		-#endif
156	164	NR_FREE_CMA_PAGES,
157	165	NR_VM_ZONE_STAT_ITEMS };
158	166
..	..	@@ -163,13 +171,20 @@
163	171	NR_INACTIVE_FILE, /* " " " " " */
164	172	NR_ACTIVE_FILE, /* " " " " " */
165	173	NR_UNEVICTABLE, /* " " " " " */
166		- NR_SLAB_RECLAIMABLE,
167		- NR_SLAB_UNRECLAIMABLE,
	174	+ NR_SLAB_RECLAIMABLE_B,
	175	+ NR_SLAB_UNRECLAIMABLE_B,
168	176	NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
169	177	NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
170		- WORKINGSET_REFAULT,
171		- WORKINGSET_ACTIVATE,
172		- WORKINGSET_RESTORE,
	178	+ WORKINGSET_NODES,
	179	+ WORKINGSET_REFAULT_BASE,
	180	+ WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
	181	+ WORKINGSET_REFAULT_FILE,
	182	+ WORKINGSET_ACTIVATE_BASE,
	183	+ WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
	184	+ WORKINGSET_ACTIVATE_FILE,
	185	+ WORKINGSET_RESTORE_BASE,
	186	+ WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
	187	+ WORKINGSET_RESTORE_FILE,
173	188	WORKINGSET_NODERECLAIM,
174	189	NR_ANON_MAPPED, /* Mapped anonymous pages */
175	190	NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
..	..	@@ -181,19 +196,42 @@
181	196	NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
182	197	NR_SHMEM_THPS,
183	198	NR_SHMEM_PMDMAPPED,
	199	+ NR_FILE_THPS,
	200	+ NR_FILE_PMDMAPPED,
184	201	NR_ANON_THPS,
185		- NR_UNSTABLE_NFS, /* NFS unstable pages */
186	202	NR_VMSCAN_WRITE,
187	203	NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
188	204	NR_DIRTIED, /* page dirtyings since bootup */
189	205	NR_WRITTEN, /* page writings since bootup */
190	206	NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
191		- NR_UNRECLAIMABLE_PAGES,
192		- NR_ION_HEAP,
193		- NR_ION_HEAP_POOL,
194		- NR_GPU_HEAP,
	207	+ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
	208	+ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
	209	+ NR_KERNEL_STACK_KB, /* measured in KiB */
	210	+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
	211	+ NR_KERNEL_SCS_KB, /* measured in KiB */
	212	+#endif
195	213	NR_VM_NODE_STAT_ITEMS
196	214	};
	215	+
	216	+/*
	217	+ * Returns true if the value is measured in bytes (most vmstat values are
	218	+ * measured in pages). This defines the API part, the internal representation
	219	+ * might be different.
	220	+ */
	221	+static __always_inline bool vmstat_item_in_bytes(int idx)
	222	+{
	223	+ /*
	224	+ * Global and per-node slab counters track slab pages.
	225	+ * It's expected that changes are multiples of PAGE_SIZE.
	226	+ * Internally values are stored in pages.
	227	+ *
	228	+ * Per-memcg and per-lruvec counters track memory, consumed
	229	+ * by individual slab objects. These counters are actually
	230	+ * byte-precise.
	231	+ */
	232	+ return (idx == NR_SLAB_RECLAIMABLE_B \|\|
	233	+ idx == NR_SLAB_UNRECLAIMABLE_B);
	234	+}
197	235
198	236	/*
199	237	* We do arithmetic on the LRU lists in various places in the code,
..	..	@@ -221,47 +259,45 @@
221	259
222	260	#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
223	261
224		-static inline int is_file_lru(enum lru_list lru)
	262	+static inline bool is_file_lru(enum lru_list lru)
225	263	{
226	264	return (lru == LRU_INACTIVE_FILE \|\| lru == LRU_ACTIVE_FILE);
227	265	}
228	266
229		-static inline int is_active_lru(enum lru_list lru)
	267	+static inline bool is_active_lru(enum lru_list lru)
230	268	{
231	269	return (lru == LRU_ACTIVE_ANON \|\| lru == LRU_ACTIVE_FILE);
232	270	}
233	271
234		-struct zone_reclaim_stat {
235		- /*
236		- * The pageout code in vmscan.c keeps track of how many of the
237		- * mem/swap backed and file backed pages are referenced.
238		- * The higher the rotated/scanned ratio, the more valuable
239		- * that cache is.
240		- *
241		- * The anon LRU stats live in [0], file LRU stats in [1]
242		- */
243		- unsigned long recent_rotated[2];
244		- unsigned long recent_scanned[2];
	272	+#define ANON_AND_FILE 2
	273	+
	274	+enum lruvec_flags {
	275	+ LRUVEC_CONGESTED, /* lruvec has many dirty pages
	276	+ * backed by a congested BDI
	277	+ */
245	278	};
246	279
247	280	struct lruvec {
248	281	struct list_head lists[NR_LRU_LISTS];
249		- struct zone_reclaim_stat reclaim_stat;
250		- /* Evictions & activations on the inactive file list */
251		- atomic_long_t inactive_age;
	282	+ /*
	283	+ * These track the cost of reclaiming one LRU - file or anon -
	284	+ * over the other. As the observed cost of reclaiming one LRU
	285	+ * increases, the reclaim scan balance tips toward the other.
	286	+ */
	287	+ unsigned long anon_cost;
	288	+ unsigned long file_cost;
	289	+ /* Non-resident age, driven by LRU movement */
	290	+ atomic_long_t nonresident_age;
252	291	/* Refaults at the time of last reclaim cycle */
253		- unsigned long refaults;
	292	+ unsigned long refaults[ANON_AND_FILE];
	293	+ /* Various lruvec state flags (enum lruvec_flags) */
	294	+ unsigned long flags;
254	295	#ifdef CONFIG_MEMCG
255	296	struct pglist_data *pgdat;
256	297	#endif
257	298	};
258	299
259		-/* Mask used at gathering information at once (see memcontrol.c) */
260		-#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) \| BIT(LRU_ACTIVE_FILE))
261		-#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) \| BIT(LRU_ACTIVE_ANON))
262		-#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
263		-
264		-/* Isolate unmapped file */
	300	+/* Isolate unmapped pages */
265	301	#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
266	302	/* Isolate for asynchronous migration */
267	303	#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
..	..	@@ -278,9 +314,10 @@
278	314	NR_WMARK
279	315	};
280	316
281		-#define min_wmark_pages(z) (z->watermark[WMARK_MIN])
282		-#define low_wmark_pages(z) (z->watermark[WMARK_LOW])
283		-#define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
	317	+#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
	318	+#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
	319	+#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
	320	+#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
284	321
285	322	struct per_cpu_pages {
286	323	int count; /* number of pages in the list */
..	..	@@ -311,33 +348,20 @@
311	348	#endif /* !__GENERATING_BOUNDS.H */
312	349
313	350	enum zone_type {
314		-#ifdef CONFIG_ZONE_DMA
315	351	/*
316		- * ZONE_DMA is used when there are devices that are not able
317		- * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
318		- * carve out the portion of memory that is needed for these devices.
319		- * The range is arch specific.
320		- *
321		- * Some examples
322		- *
323		- * Architecture Limit
324		- * ---------------------------
325		- * parisc, ia64, sparc <4G
326		- * s390 <2G
327		- * arm Various
328		- * alpha Unlimited or 0-16MB.
329		- *
330		- * i386, x86_64 and multiple other arches
331		- * <16M.
	352	+ * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
	353	+ * to DMA to all of the addressable memory (ZONE_NORMAL).
	354	+ * On architectures where this area covers the whole 32 bit address
	355	+ * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
	356	+ * DMA addressing constraints. This distinction is important as a 32bit
	357	+ * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
	358	+ * platforms may need both zones as they support peripherals with
	359	+ * different DMA addressing limitations.
332	360	*/
	361	+#ifdef CONFIG_ZONE_DMA
333	362	ZONE_DMA,
334	363	#endif
335	364	#ifdef CONFIG_ZONE_DMA32
336		- /*
337		- * x86_64 needs two ZONE_DMAs because it supports devices that are
338		- * only able to do DMA to the lower 16M but also 32 bit devices that
339		- * can only do DMA areas below 4G.
340		- */
341	365	ZONE_DMA32,
342	366	#endif
343	367	/*
..	..	@@ -357,6 +381,41 @@
357	381	*/
358	382	ZONE_HIGHMEM,
359	383	#endif
	384	+ /*
	385	+ * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains
	386	+ * movable pages with few exceptional cases described below. Main use
	387	+ * cases for ZONE_MOVABLE are to make memory offlining/unplug more
	388	+ * likely to succeed, and to locally limit unmovable allocations - e.g.,
	389	+ * to increase the number of THP/huge pages. Notable special cases are:
	390	+ *
	391	+ * 1. Pinned pages: (long-term) pinning of movable pages might
	392	+ * essentially turn such pages unmovable. Memory offlining might
	393	+ * retry a long time.
	394	+ * 2. memblock allocations: kernelcore/movablecore setups might create
	395	+ * situations where ZONE_MOVABLE contains unmovable allocations
	396	+ * after boot. Memory offlining and allocations fail early.
	397	+ * 3. Memory holes: kernelcore/movablecore setups might create very rare
	398	+ * situations where ZONE_MOVABLE contains memory holes after boot,
	399	+ * for example, if we have sections that are only partially
	400	+ * populated. Memory offlining and allocations fail early.
	401	+ * 4. PG_hwpoison pages: while poisoned pages can be skipped during
	402	+ * memory offlining, such pages cannot be allocated.
	403	+ * 5. Unmovable PG_offline pages: in paravirtualized environments,
	404	+ * hotplugged memory blocks might only partially be managed by the
	405	+ * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The
	406	+ * parts not manged by the buddy are unmovable PG_offline pages. In
	407	+ * some cases (virtio-mem), such pages can be skipped during
	408	+ * memory offlining, however, cannot be moved/allocated. These
	409	+ * techniques might use alloc_contig_range() to hide previously
	410	+ * exposed pages from the buddy again (e.g., to implement some sort
	411	+ * of memory unplug in virtio-mem).
	412	+ *
	413	+ * In general, no unmovable allocations that degrade memory offlining
	414	+ * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
	415	+ * have to expect that migrating pages in ZONE_MOVABLE can fail (even
	416	+ * if has_unmovable_pages() states that there are no unmovable pages,
	417	+ * there can be false negatives).
	418	+ */
360	419	ZONE_MOVABLE,
361	420	#ifdef CONFIG_ZONE_DEVICE
362	421	ZONE_DEVICE,
..	..	@@ -367,11 +426,14 @@
367	426
368	427	#ifndef __GENERATING_BOUNDS_H
369	428
	429	+#define ASYNC_AND_SYNC 2
	430	+
370	431	struct zone {
371	432	/* Read-mostly fields */
372	433
373	434	/* zone watermarks, access with _wmark_pages(zone) macros /
374		- unsigned long watermark[NR_WMARK];
	435	+ unsigned long _watermark[NR_WMARK];
	436	+ unsigned long watermark_boost;
375	437
376	438	unsigned long nr_reserved_highatomic;
377	439
..	..	@@ -386,15 +448,11 @@
386	448	*/
387	449	long lowmem_reserve[MAX_NR_ZONES];
388	450
389		-#ifdef CONFIG_NUMA
	451	+#ifdef CONFIG_NEED_MULTIPLE_NODES
390	452	int node;
391	453	#endif
392	454	struct pglist_data *zone_pgdat;
393	455	struct per_cpu_pageset __percpu *pageset;
394		-
395		-#ifdef CONFIG_CMA
396		- bool cma_alloc;
397		-#endif
398	456
399	457	#ifndef CONFIG_SPARSEMEM
400	458	/*
..	..	@@ -421,6 +479,9 @@
421	479	* bootmem allocator):
422	480	* managed_pages = present_pages - reserved_pages;
423	481	*
	482	+ * cma pages is present pages that are assigned for CMA use
	483	+ * (MIGRATE_CMA).
	484	+ *
424	485	* So present_pages may be used by memory hotplug or memory power
425	486	* management logic to figure out unmanaged pages by checking
426	487	* (present_pages - managed_pages). And managed_pages should be used
..	..	@@ -441,16 +502,13 @@
441	502	* Write access to present_pages at runtime should be protected by
442	503	* mem_hotplug_begin/end(). Any reader who can't tolerant drift of
443	504	* present_pages should get_online_mems() to get a stable value.
444		- *
445		- * Read access to managed_pages should be safe because it's unsigned
446		- * long. Write access to zone->managed_pages and totalram_pages are
447		- * protected by managed_page_count_lock at runtime. Idealy only
448		- * adjust_managed_page_count() should be used instead of directly
449		- * touching zone->managed_pages and totalram_pages.
450	505	*/
451		- unsigned long managed_pages;
	506	+ atomic_long_t managed_pages;
452	507	unsigned long spanned_pages;
453	508	unsigned long present_pages;
	509	+#ifdef CONFIG_CMA
	510	+ unsigned long cma_pages;
	511	+#endif
454	512
455	513	const char *name;
456	514
..	..	@@ -495,8 +553,10 @@
495	553	#if defined CONFIG_COMPACTION \|\| defined CONFIG_CMA
496	554	/* pfn where compaction free scanner should start */
497	555	unsigned long compact_cached_free_pfn;
498		- /* pfn where async and sync compaction migration scanner should start */
499		- unsigned long compact_cached_migrate_pfn[2];
	556	+ /* pfn where compaction migration scanner should start */
	557	+ unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
	558	+ unsigned long compact_init_migrate_pfn;
	559	+ unsigned long compact_init_free_pfn;
500	560	#endif
501	561
502	562	#ifdef CONFIG_COMPACTION
..	..	@@ -504,6 +564,7 @@
504	564	* On compaction failure, 1<<compact_defer_shift compactions
505	565	* are skipped before trying again. The number attempted since
506	566	* last failure is tracked with compact_considered.
	567	+ * compact_order_failed is the minimum compaction failed order.
507	568	*/
508	569	unsigned int compact_considered;
509	570	unsigned int compact_defer_shift;
..	..	@@ -529,9 +590,6 @@
529	590	} ____cacheline_internodealigned_in_smp;
530	591
531	592	enum pgdat_flags {
532		- PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
533		- * a congested BDI
534		- */
535	593	PGDAT_DIRTY, /* reclaim scanning has recently found
536	594	* many dirty file pages at the tail
537	595	* of the LRU.
..	..	@@ -541,6 +599,26 @@
541	599	*/
542	600	PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
543	601	};
	602	+
	603	+enum zone_flags {
	604	+ ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
	605	+ * Cleared when kswapd is woken.
	606	+ */
	607	+};
	608	+
	609	+static inline unsigned long zone_managed_pages(struct zone *zone)
	610	+{
	611	+ return (unsigned long)atomic_long_read(&zone->managed_pages);
	612	+}
	613	+
	614	+static inline unsigned long zone_cma_pages(struct zone *zone)
	615	+{
	616	+#ifdef CONFIG_CMA
	617	+ return zone->cma_pages;
	618	+#else
	619	+ return 0;
	620	+#endif
	621	+}
544	622
545	623	static inline unsigned long zone_end_pfn(const struct zone *zone)
546	624	{
..	..	@@ -632,6 +710,14 @@
632	710	extern struct page *mem_map;
633	711	#endif
634	712
	713	+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	714	+struct deferred_split {
	715	+ spinlock_t split_queue_lock;
	716	+ struct list_head split_queue;
	717	+ unsigned long split_queue_len;
	718	+};
	719	+#endif
	720	+
635	721	/*
636	722	* On NUMA machines, each NUMA node would have a pg_data_t to describe
637	723	* it's memory layout. On UMA machines there is a single pglist_data which
..	..	@@ -640,24 +726,32 @@
640	726	* Memory statistics and page replacement data structures are maintained on a
641	727	* per-zone basis.
642	728	*/
643		-struct bootmem_data;
644	729	typedef struct pglist_data {
	730	+ /*
	731	+ * node_zones contains just the zones for THIS node. Not all of the
	732	+ * zones may be populated, but it is the full list. It is referenced by
	733	+ * this node's node_zonelists as well as other node's node_zonelists.
	734	+ */
645	735	struct zone node_zones[MAX_NR_ZONES];
	736	+
	737	+ /*
	738	+ * node_zonelists contains references to all zones in all nodes.
	739	+ * Generally the first zones will be references to this node's
	740	+ * node_zones.
	741	+ */
646	742	struct zonelist node_zonelists[MAX_ZONELISTS];
647		- int nr_zones;
	743	+
	744	+ int nr_zones; /* number of populated zones in this node */
648	745	#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
649	746	struct page *node_mem_map;
650	747	#ifdef CONFIG_PAGE_EXTENSION
651	748	struct page_ext *node_page_ext;
652	749	#endif
653	750	#endif
654		-#ifndef CONFIG_NO_BOOTMEM
655		- struct bootmem_data *bdata;
656		-#endif
657	751	#if defined(CONFIG_MEMORY_HOTPLUG) \|\| defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
658	752	/*
659		- * Must be held any time you expect node_start_pfn, node_present_pages
660		- * or node_spanned_pages stay constant.
	753	+ * Must be held any time you expect node_start_pfn,
	754	+ * node_present_pages, node_spanned_pages or nr_zones to stay constant.
661	755	* Also synchronizes pgdat->first_deferred_pfn during deferred page
662	756	* init.
663	757	*
..	..	@@ -678,16 +772,19 @@
678	772	wait_queue_head_t pfmemalloc_wait;
679	773	struct task_struct kswapd; / Protected by
680	774	mem_hotplug_begin/end() */
	775	+ struct task_struct *mkswapd[MAX_KSWAPD_THREADS];
681	776	int kswapd_order;
682		- enum zone_type kswapd_classzone_idx;
	777	+ enum zone_type kswapd_highest_zoneidx;
683	778
684	779	int kswapd_failures; /* Number of 'reclaimed == 0' runs */
685	780
	781	+ ANDROID_OEM_DATA(1);
686	782	#ifdef CONFIG_COMPACTION
687	783	int kcompactd_max_order;
688		- enum zone_type kcompactd_classzone_idx;
	784	+ enum zone_type kcompactd_highest_zoneidx;
689	785	wait_queue_head_t kcompactd_wait;
690	786	struct task_struct *kcompactd;
	787	+ bool proactive_compact_trigger;
691	788	#endif
692	789	/*
693	790	* This is a per-node reserve of pages that are not available
..	..	@@ -697,7 +794,7 @@
697	794
698	795	#ifdef CONFIG_NUMA
699	796	/*
700		- * zone reclaim becomes active if more unmapped pages exist.
	797	+ * node reclaim becomes active if more unmapped pages exist.
701	798	*/
702	799	unsigned long min_unmapped_pages;
703	800	unsigned long min_slab_pages;
..	..	@@ -713,18 +810,20 @@
713	810	* is the first PFN that needs to be initialised.
714	811	*/
715	812	unsigned long first_deferred_pfn;
716		- /* Number of non-deferred pages */
717		- unsigned long static_init_pgcnt;
718	813	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
719	814
720	815	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
721		- spinlock_t split_queue_lock;
722		- struct list_head split_queue;
723		- unsigned long split_queue_len;
	816	+ struct deferred_split deferred_split_queue;
724	817	#endif
725	818
726	819	/* Fields commonly accessed by the page reclaim scanner */
727		- struct lruvec lruvec;
	820	+
	821	+ /*
	822	+ * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
	823	+ *
	824	+ * Use mem_cgroup_lruvec() to look up lruvecs.
	825	+ */
	826	+ struct lruvec __lruvec;
728	827
729	828	unsigned long flags;
730	829
..	..	@@ -746,15 +845,6 @@
746	845
747	846	#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
748	847	#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
749		-static inline spinlock_t zone_lru_lock(struct zone zone)
750		-{
751		- return &zone->zone_pgdat->lru_lock;
752		-}
753		-
754		-static inline struct lruvec node_lruvec(struct pglist_data pgdat)
755		-{
756		- return &pgdat->lruvec;
757		-}
758	848
759	849	static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
760	850	{
..	..	@@ -770,15 +860,15 @@
770	860
771	861	void build_all_zonelists(pg_data_t *pgdat);
772	862	void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
773		- enum zone_type classzone_idx);
	863	+ enum zone_type highest_zoneidx);
774	864	bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
775		- int classzone_idx, unsigned int alloc_flags,
	865	+ int highest_zoneidx, unsigned int alloc_flags,
776	866	long free_pages);
777	867	bool zone_watermark_ok(struct zone *z, unsigned int order,
778		- unsigned long mark, int classzone_idx,
	868	+ unsigned long mark, int highest_zoneidx,
779	869	unsigned int alloc_flags);
780	870	bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
781		- unsigned long mark, int classzone_idx);
	871	+ unsigned long mark, int highest_zoneidx);
782	872	/*
783	873	* Memory initialization context, use to differentiate memory added by
784	874	* the platform statically or via memory hotplug interface.
..	..	@@ -798,17 +888,11 @@
798	888	#ifdef CONFIG_MEMCG
799	889	return lruvec->pgdat;
800	890	#else
801		- return container_of(lruvec, struct pglist_data, lruvec);
	891	+ return container_of(lruvec, struct pglist_data, __lruvec);
802	892	#endif
803	893	}
804	894
805	895	extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
806		-
807		-#ifdef CONFIG_HAVE_MEMORY_PRESENT
808		-void memory_present(int nid, unsigned long start, unsigned long end);
809		-#else
810		-static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
811		-#endif
812	896
813	897	#ifdef CONFIG_HAVE_MEMORYLESS_NODES
814	898	int local_memory_node(int node_id);
..	..	@@ -821,18 +905,6 @@
821	905	*/
822	906	#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
823	907
824		-#ifdef CONFIG_ZONE_DEVICE
825		-static inline bool is_dev_zone(const struct zone *zone)
826		-{
827		- return zone_idx(zone) == ZONE_DEVICE;
828		-}
829		-#else
830		-static inline bool is_dev_zone(const struct zone *zone)
831		-{
832		- return false;
833		-}
834		-#endif
835		-
836	908	/*
837	909	* Returns true if a zone has pages managed by the buddy allocator.
838	910	* All the reclaim decisions have to use this function rather than
..	..	@@ -841,7 +913,7 @@
841	913	*/
842	914	static inline bool managed_zone(struct zone *zone)
843	915	{
844		- return zone->managed_pages;
	916	+ return zone_managed_pages(zone);
845	917	}
846	918
847	919	/* Returns true if a zone has memory */
..	..	@@ -850,7 +922,7 @@
850	922	return zone->present_pages;
851	923	}
852	924
853		-#ifdef CONFIG_NUMA
	925	+#ifdef CONFIG_NEED_MULTIPLE_NODES
854	926	static inline int zone_to_nid(struct zone *zone)
855	927	{
856	928	return zone->node;
..	..	@@ -874,7 +946,7 @@
874	946	#ifdef CONFIG_HIGHMEM
875	947	static inline int zone_movable_is_highmem(void)
876	948	{
877		-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
	949	+#ifdef CONFIG_NEED_MULTIPLE_NODES
878	950	return movable_zone == ZONE_HIGHMEM;
879	951	#else
880	952	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
..	..	@@ -892,8 +964,17 @@
892	964	#endif
893	965	}
894	966
	967	+#ifdef CONFIG_ZONE_DMA
	968	+bool has_managed_dma(void);
	969	+#else
	970	+static inline bool has_managed_dma(void)
	971	+{
	972	+ return false;
	973	+}
	974	+#endif
	975	+
895	976	/**
896		- * is_highmem - helper function to quickly check if a struct zone is a
	977	+ * is_highmem - helper function to quickly check if a struct zone is a
897	978	* highmem zone or not. This is an attempt to keep references
898	979	* to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
899	980	* @zone - pointer to struct zone variable
..	..	@@ -909,22 +990,23 @@
909	990
910	991	/* These two functions are used to setup the per zone pages min values */
911	992	struct ctl_table;
912		-int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
913		- void __user , size_t , loff_t *);
914		-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
915		- void __user , size_t , loff_t *);
916		-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
917		-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
918		- void __user , size_t , loff_t *);
919		-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
920		- void __user , size_t , loff_t *);
921		-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
922		- void __user , size_t , loff_t *);
923		-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
924		- void __user , size_t , loff_t *);
925	993
926		-extern int numa_zonelist_order_handler(struct ctl_table *, int,
927		- void __user , size_t , loff_t *);
	994	+int min_free_kbytes_sysctl_handler(struct ctl_table , int, void , size_t *,
	995	+ loff_t *);
	996	+int watermark_scale_factor_sysctl_handler(struct ctl_table , int, void ,
	997	+ size_t , loff_t );
	998	+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
	999	+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table , int, void ,
	1000	+ size_t , loff_t );
	1001	+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
	1002	+ void , size_t , loff_t *);
	1003	+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
	1004	+ void , size_t , loff_t *);
	1005	+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
	1006	+ void , size_t , loff_t *);
	1007	+int numa_zonelist_order_handler(struct ctl_table *, int,
	1008	+ void , size_t , loff_t *);
	1009	+extern int percpu_pagelist_fraction;
928	1010	extern char numa_zonelist_order[];
929	1011	#define NUMA_ZONELIST_ORDER_LEN 16
930	1012
..	..	@@ -943,6 +1025,7 @@
943	1025	extern struct pglist_data *first_online_pgdat(void);
944	1026	extern struct pglist_data next_online_pgdat(struct pglist_data pgdat);
945	1027	extern struct zone next_zone(struct zone zone);
	1028	+extern int isolate_anon_lru_page(struct page *page);
946	1029
947	1030	/**
948	1031	* for_each_online_pgdat - helper macro to iterate over all online nodes
..	..	@@ -1039,7 +1122,7 @@
1039	1122	/**
1040	1123	* for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
1041	1124	* @zone - The current zone in the iterator
1042		- * @z - The current pointer within zonelist->zones being iterated
	1125	+ * @z - The current pointer within zonelist->_zonerefs being iterated
1043	1126	* @zlist - The zonelist being iterated
1044	1127	* @highidx - The zone index of the highest zone to return
1045	1128	* @nodemask - Nodemask allowed by the allocator
..	..	@@ -1053,7 +1136,7 @@
1053	1136	z = next_zones_zonelist(++z, highidx, nodemask), \
1054	1137	zone = zonelist_zone(z))
1055	1138
1056		-#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
	1139	+#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
1057	1140	for (zone = z->zone; \
1058	1141	zone; \
1059	1142	z = next_zones_zonelist(++z, highidx, nodemask), \
..	..	@@ -1074,15 +1157,6 @@
1074	1157
1075	1158	#ifdef CONFIG_SPARSEMEM
1076	1159	#include <asm/sparsemem.h>
1077		-#endif
1078		-
1079		-#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
1080		- !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
1081		-static inline unsigned long early_pfn_to_nid(unsigned long pfn)
1082		-{
1083		- BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
1084		- return 0;
1085		-}
1086	1160	#endif
1087	1161
1088	1162	#ifdef CONFIG_FLATMEM
..	..	@@ -1124,6 +1198,32 @@
1124	1198	#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
1125	1199	#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
1126	1200
	1201	+#define SUBSECTION_SHIFT 21
	1202	+#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
	1203	+
	1204	+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
	1205	+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
	1206	+#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))
	1207	+
	1208	+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
	1209	+#error Subsection size exceeds section size
	1210	+#else
	1211	+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
	1212	+#endif
	1213	+
	1214	+#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
	1215	+#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
	1216	+
	1217	+struct mem_section_usage {
	1218	+#ifdef CONFIG_SPARSEMEM_VMEMMAP
	1219	+ DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
	1220	+#endif
	1221	+ /* See declaration of similar field in struct zone */
	1222	+ unsigned long pageblock_flags[0];
	1223	+};
	1224	+
	1225	+void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
	1226	+
1127	1227	struct page;
1128	1228	struct page_ext;
1129	1229	struct mem_section {
..	..	@@ -1141,8 +1241,7 @@
1141	1241	*/
1142	1242	unsigned long section_mem_map;
1143	1243
1144		- /* See declaration of similar field in struct zone */
1145		- unsigned long *pageblock_flags;
	1244	+ struct mem_section_usage *usage;
1146	1245	#ifdef CONFIG_PAGE_EXTENSION
1147	1246	/*
1148	1247	* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
..	..	@@ -1173,18 +1272,26 @@
1173	1272	extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1174	1273	#endif
1175	1274
	1275	+static inline unsigned long section_to_usemap(struct mem_section ms)
	1276	+{
	1277	+ return ms->usage->pageblock_flags;
	1278	+}
	1279	+
1176	1280	static inline struct mem_section *__nr_to_section(unsigned long nr)
1177	1281	{
	1282	+ unsigned long root = SECTION_NR_TO_ROOT(nr);
	1283	+
	1284	+ if (unlikely(root >= NR_SECTION_ROOTS))
	1285	+ return NULL;
	1286	+
1178	1287	#ifdef CONFIG_SPARSEMEM_EXTREME
1179		- if (!mem_section)
	1288	+ if (!mem_section \|\| !mem_section[root])
1180	1289	return NULL;
1181	1290	#endif
1182		- if (!mem_section[SECTION_NR_TO_ROOT(nr)])
1183		- return NULL;
1184		- return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
	1291	+ return &mem_section[root][nr & SECTION_ROOT_MASK];
1185	1292	}
1186		-extern int __section_nr(struct mem_section* ms);
1187		-extern unsigned long usemap_size(void);
	1293	+extern unsigned long __section_nr(struct mem_section *ms);
	1294	+extern size_t mem_section_usage_size(void);
1188	1295
1189	1296	/*
1190	1297	* We use the lower bits of the mem_map pointer to store
..	..	@@ -1202,7 +1309,8 @@
1202	1309	#define SECTION_MARKED_PRESENT (1UL<<0)
1203	1310	#define SECTION_HAS_MEM_MAP (1UL<<1)
1204	1311	#define SECTION_IS_ONLINE (1UL<<2)
1205		-#define SECTION_MAP_LAST_BIT (1UL<<3)
	1312	+#define SECTION_IS_EARLY (1UL<<3)
	1313	+#define SECTION_MAP_LAST_BIT (1UL<<4)
1206	1314	#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
1207	1315	#define SECTION_NID_SHIFT 3
1208	1316
..	..	@@ -1226,6 +1334,11 @@
1226	1334	static inline int valid_section(struct mem_section *section)
1227	1335	{
1228	1336	return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
	1337	+}
	1338	+
	1339	+static inline int early_section(struct mem_section *section)
	1340	+{
	1341	+ return (section && (section->section_mem_map & SECTION_IS_EARLY));
1229	1342	}
1230	1343
1231	1344	static inline int valid_section_nr(unsigned long nr)
..	..	@@ -1255,22 +1368,60 @@
1255	1368	return __nr_to_section(pfn_to_section_nr(pfn));
1256	1369	}
1257	1370
1258		-extern int __highest_present_section_nr;
	1371	+extern unsigned long __highest_present_section_nr;
	1372	+
	1373	+static inline int subsection_map_index(unsigned long pfn)
	1374	+{
	1375	+ return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
	1376	+}
	1377	+
	1378	+#ifdef CONFIG_SPARSEMEM_VMEMMAP
	1379	+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
	1380	+{
	1381	+ int idx = subsection_map_index(pfn);
	1382	+
	1383	+ return test_bit(idx, ms->usage->subsection_map);
	1384	+}
	1385	+#else
	1386	+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
	1387	+{
	1388	+ return 1;
	1389	+}
	1390	+#endif
1259	1391
1260	1392	#ifndef CONFIG_HAVE_ARCH_PFN_VALID
1261	1393	static inline int pfn_valid(unsigned long pfn)
1262	1394	{
	1395	+ struct mem_section *ms;
	1396	+
1263	1397	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1264	1398	return 0;
1265		- return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
	1399	+ ms = __nr_to_section(pfn_to_section_nr(pfn));
	1400	+ if (!valid_section(ms))
	1401	+ return 0;
	1402	+ /*
	1403	+ * Traditionally early sections always returned pfn_valid() for
	1404	+ * the entire section-sized span.
	1405	+ */
	1406	+ return early_section(ms) \|\| pfn_section_valid(ms, pfn);
1266	1407	}
1267	1408	#endif
1268	1409
1269		-static inline int pfn_present(unsigned long pfn)
	1410	+static inline int pfn_in_present_section(unsigned long pfn)
1270	1411	{
1271	1412	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1272	1413	return 0;
1273	1414	return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
	1415	+}
	1416	+
	1417	+static inline unsigned long next_present_section_nr(unsigned long section_nr)
	1418	+{
	1419	+ while (++section_nr <= __highest_present_section_nr) {
	1420	+ if (present_section_nr(section_nr))
	1421	+ return section_nr;
	1422	+ }
	1423	+
	1424	+ return -1;
1274	1425	}
1275	1426
1276	1427	/*
..	..	@@ -1288,11 +1439,12 @@
1288	1439	#define pfn_to_nid(pfn) (0)
1289	1440	#endif
1290	1441
1291		-#define early_pfn_valid(pfn) pfn_valid(pfn)
1292	1442	void sparse_init(void);
1293	1443	#else
1294	1444	#define sparse_init() do {} while (0)
1295	1445	#define sparse_index_init(_sec, _nid) do {} while (0)
	1446	+#define pfn_in_present_section pfn_valid
	1447	+#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
1296	1448	#endif /* CONFIG_SPARSEMEM */
1297	1449
1298	1450	/*
..	..	@@ -1306,15 +1458,9 @@
1306	1458	int last_nid;
1307	1459	};
1308	1460
1309		-#ifndef early_pfn_valid
1310		-#define early_pfn_valid(pfn) (1)
1311		-#endif
1312		-
1313		-void memory_present(int nid, unsigned long start, unsigned long end);
1314		-
1315	1461	/*
1316	1462	* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
1317		- * need to check pfn validility within that MAX_ORDER_NR_PAGES block.
	1463	+ * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
1318	1464	* pfn_valid_within() should be used in this case; we optimise this away
1319	1465	* when we have no holes within a MAX_ORDER_NR_PAGES block.
1320	1466	*/
..	..	@@ -1323,37 +1469,6 @@
1323	1469	#else
1324	1470	#define pfn_valid_within(pfn) (1)
1325	1471	#endif
1326		-
1327		-#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
1328		-/*
1329		- * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
1330		- * associated with it or not. This means that a struct page exists for this
1331		- * pfn. The caller cannot assume the page is fully initialized in general.
1332		- * Hotplugable pages might not have been onlined yet. pfn_to_online_page()
1333		- * will ensure the struct page is fully online and initialized. Special pages
1334		- * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
1335		- *
1336		- * In FLATMEM, it is expected that holes always have valid memmap as long as
1337		- * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
1338		- * that a valid section has a memmap for the entire section.
1339		- *
1340		- * However, an ARM, and maybe other embedded architectures in the future
1341		- * free memmap backing holes to save memory on the assumption the memmap is
1342		- * never used. The page_zone linkages are then broken even though pfn_valid()
1343		- * returns true. A walker of the full memmap must then do this additional
1344		- * check to ensure the memmap they are looking at is sane by making sure
1345		- * the zone and PFN linkages are still valid. This is expensive, but walkers
1346		- * of the full memmap are extremely rare.
1347		- */
1348		-bool memmap_valid_within(unsigned long pfn,
1349		- struct page page, struct zone zone);
1350		-#else
1351		-static inline bool memmap_valid_within(unsigned long pfn,
1352		- struct page page, struct zone zone)
1353		-{
1354		- return true;
1355		-}
1356		-#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
1357	1472
1358	1473	#endif /* !__GENERATING_BOUNDS.H */
1359	1474	#endif /* !__ASSEMBLY__ */