~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,23 +1,14 @@
	1	+// SPDX-License-Identifier: GPL-2.0-or-later
1	2	/*
2	3	* Copyright 2013 Red Hat Inc.
3	4	*
4		- * This program is free software; you can redistribute it and/or modify
5		- * it under the terms of the GNU General Public License as published by
6		- * the Free Software Foundation; either version 2 of the License, or
7		- * (at your option) any later version.
8		- *
9		- * This program is distributed in the hope that it will be useful,
10		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		- * GNU General Public License for more details.
13		- *
14		- * Authors: JÃ©rÃ´me Glisse <jglisse@redhat.com>
	5	+ * Authors: Jérôme Glisse <jglisse@redhat.com>
15	6	*/
16	7	/*
17	8	* Refer to include/linux/hmm.h for information about heterogeneous memory
18	9	* management or HMM for short.
19	10	*/
20		-#include <linux/mm.h>
	11	+#include <linux/pagewalk.h>
21	12	#include <linux/hmm.h>
22	13	#include <linux/init.h>
23	14	#include <linux/rmap.h>
..	..	@@ -29,545 +20,300 @@
29	20	#include <linux/swapops.h>
30	21	#include <linux/hugetlb.h>
31	22	#include <linux/memremap.h>
	23	+#include <linux/sched/mm.h>
32	24	#include <linux/jump_label.h>
	25	+#include <linux/dma-mapping.h>
33	26	#include <linux/mmu_notifier.h>
34	27	#include <linux/memory_hotplug.h>
35		-
36		-#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
37		-
38		-#if IS_ENABLED(CONFIG_HMM_MIRROR)
39		-static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
40		-
41		-/*
42		- * struct hmm - HMM per mm struct
43		- *
44		- * @mm: mm struct this HMM struct is bound to
45		- * @lock: lock protecting ranges list
46		- * @sequence: we track updates to the CPU page table with a sequence number
47		- * @ranges: list of range being snapshotted
48		- * @mirrors: list of mirrors for this mm
49		- * @mmu_notifier: mmu notifier to track updates to CPU page table
50		- * @mirrors_sem: read/write semaphore protecting the mirrors list
51		- */
52		-struct hmm {
53		- struct mm_struct *mm;
54		- spinlock_t lock;
55		- atomic_t sequence;
56		- struct list_head ranges;
57		- struct list_head mirrors;
58		- struct mmu_notifier mmu_notifier;
59		- struct rw_semaphore mirrors_sem;
60		-};
61		-
62		-/*
63		- * hmm_register - register HMM against an mm (HMM internal)
64		- *
65		- * @mm: mm struct to attach to
66		- *
67		- * This is not intended to be used directly by device drivers. It allocates an
68		- * HMM struct if mm does not have one, and initializes it.
69		- */
70		-static struct hmm hmm_register(struct mm_struct mm)
71		-{
72		- struct hmm *hmm = READ_ONCE(mm->hmm);
73		- bool cleanup = false;
74		-
75		- /*
76		- * The hmm struct can only be freed once the mm_struct goes away,
77		- * hence we should always have pre-allocated an new hmm struct
78		- * above.
79		- */
80		- if (hmm)
81		- return hmm;
82		-
83		- hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
84		- if (!hmm)
85		- return NULL;
86		- INIT_LIST_HEAD(&hmm->mirrors);
87		- init_rwsem(&hmm->mirrors_sem);
88		- atomic_set(&hmm->sequence, 0);
89		- hmm->mmu_notifier.ops = NULL;
90		- INIT_LIST_HEAD(&hmm->ranges);
91		- spin_lock_init(&hmm->lock);
92		- hmm->mm = mm;
93		-
94		- spin_lock(&mm->page_table_lock);
95		- if (!mm->hmm)
96		- mm->hmm = hmm;
97		- else
98		- cleanup = true;
99		- spin_unlock(&mm->page_table_lock);
100		-
101		- if (cleanup)
102		- goto error;
103		-
104		- /*
105		- * We should only get here if hold the mmap_sem in write mode ie on
106		- * registration of first mirror through hmm_mirror_register()
107		- */
108		- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
109		- if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
110		- goto error_mm;
111		-
112		- return mm->hmm;
113		-
114		-error_mm:
115		- spin_lock(&mm->page_table_lock);
116		- if (mm->hmm == hmm)
117		- mm->hmm = NULL;
118		- spin_unlock(&mm->page_table_lock);
119		-error:
120		- kfree(hmm);
121		- return NULL;
122		-}
123		-
124		-void hmm_mm_destroy(struct mm_struct *mm)
125		-{
126		- kfree(mm->hmm);
127		-}
128		-
129		-static void hmm_invalidate_range(struct hmm *hmm,
130		- enum hmm_update_type action,
131		- unsigned long start,
132		- unsigned long end)
133		-{
134		- struct hmm_mirror *mirror;
135		- struct hmm_range *range;
136		-
137		- spin_lock(&hmm->lock);
138		- list_for_each_entry(range, &hmm->ranges, list) {
139		- unsigned long addr, idx, npages;
140		-
141		- if (end < range->start \|\| start >= range->end)
142		- continue;
143		-
144		- range->valid = false;
145		- addr = max(start, range->start);
146		- idx = (addr - range->start) >> PAGE_SHIFT;
147		- npages = (min(range->end, end) - addr) >> PAGE_SHIFT;
148		- memset(&range->pfns[idx], 0, sizeof(range->pfns) npages);
149		- }
150		- spin_unlock(&hmm->lock);
151		-
152		- down_read(&hmm->mirrors_sem);
153		- list_for_each_entry(mirror, &hmm->mirrors, list)
154		- mirror->ops->sync_cpu_device_pagetables(mirror, action,
155		- start, end);
156		- up_read(&hmm->mirrors_sem);
157		-}
158		-
159		-static void hmm_release(struct mmu_notifier mn, struct mm_struct mm)
160		-{
161		- struct hmm_mirror *mirror;
162		- struct hmm *hmm = mm->hmm;
163		-
164		- down_write(&hmm->mirrors_sem);
165		- mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
166		- list);
167		- while (mirror) {
168		- list_del_init(&mirror->list);
169		- if (mirror->ops->release) {
170		- /*
171		- * Drop mirrors_sem so callback can wait on any pending
172		- * work that might itself trigger mmu_notifier callback
173		- * and thus would deadlock with us.
174		- */
175		- up_write(&hmm->mirrors_sem);
176		- mirror->ops->release(mirror);
177		- down_write(&hmm->mirrors_sem);
178		- }
179		- mirror = list_first_entry_or_null(&hmm->mirrors,
180		- struct hmm_mirror, list);
181		- }
182		- up_write(&hmm->mirrors_sem);
183		-}
184		-
185		-static int hmm_invalidate_range_start(struct mmu_notifier *mn,
186		- struct mm_struct *mm,
187		- unsigned long start,
188		- unsigned long end,
189		- bool blockable)
190		-{
191		- struct hmm *hmm = mm->hmm;
192		-
193		- VM_BUG_ON(!hmm);
194		-
195		- atomic_inc(&hmm->sequence);
196		-
197		- return 0;
198		-}
199		-
200		-static void hmm_invalidate_range_end(struct mmu_notifier *mn,
201		- struct mm_struct *mm,
202		- unsigned long start,
203		- unsigned long end)
204		-{
205		- struct hmm *hmm = mm->hmm;
206		-
207		- VM_BUG_ON(!hmm);
208		-
209		- hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
210		-}
211		-
212		-static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
213		- .release = hmm_release,
214		- .invalidate_range_start = hmm_invalidate_range_start,
215		- .invalidate_range_end = hmm_invalidate_range_end,
216		-};
217		-
218		-/*
219		- * hmm_mirror_register() - register a mirror against an mm
220		- *
221		- * @mirror: new mirror struct to register
222		- * @mm: mm to register against
223		- *
224		- * To start mirroring a process address space, the device driver must register
225		- * an HMM mirror struct.
226		- *
227		- * THE mm->mmap_sem MUST BE HELD IN WRITE MODE !
228		- */
229		-int hmm_mirror_register(struct hmm_mirror mirror, struct mm_struct mm)
230		-{
231		- /* Sanity check */
232		- if (!mm \|\| !mirror \|\| !mirror->ops)
233		- return -EINVAL;
234		-
235		-again:
236		- mirror->hmm = hmm_register(mm);
237		- if (!mirror->hmm)
238		- return -ENOMEM;
239		-
240		- down_write(&mirror->hmm->mirrors_sem);
241		- if (mirror->hmm->mm == NULL) {
242		- /*
243		- * A racing hmm_mirror_unregister() is about to destroy the hmm
244		- * struct. Try again to allocate a new one.
245		- */
246		- up_write(&mirror->hmm->mirrors_sem);
247		- mirror->hmm = NULL;
248		- goto again;
249		- } else {
250		- list_add(&mirror->list, &mirror->hmm->mirrors);
251		- up_write(&mirror->hmm->mirrors_sem);
252		- }
253		-
254		- return 0;
255		-}
256		-EXPORT_SYMBOL(hmm_mirror_register);
257		-
258		-/*
259		- * hmm_mirror_unregister() - unregister a mirror
260		- *
261		- * @mirror: new mirror struct to register
262		- *
263		- * Stop mirroring a process address space, and cleanup.
264		- */
265		-void hmm_mirror_unregister(struct hmm_mirror *mirror)
266		-{
267		- bool should_unregister = false;
268		- struct mm_struct *mm;
269		- struct hmm *hmm;
270		-
271		- if (mirror->hmm == NULL)
272		- return;
273		-
274		- hmm = mirror->hmm;
275		- down_write(&hmm->mirrors_sem);
276		- list_del_init(&mirror->list);
277		- should_unregister = list_empty(&hmm->mirrors);
278		- mirror->hmm = NULL;
279		- mm = hmm->mm;
280		- hmm->mm = NULL;
281		- up_write(&hmm->mirrors_sem);
282		-
283		- if (!should_unregister \|\| mm == NULL)
284		- return;
285		-
286		- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
287		-
288		- spin_lock(&mm->page_table_lock);
289		- if (mm->hmm == hmm)
290		- mm->hmm = NULL;
291		- spin_unlock(&mm->page_table_lock);
292		-
293		- kfree(hmm);
294		-}
295		-EXPORT_SYMBOL(hmm_mirror_unregister);
296	28
297	29	struct hmm_vma_walk {
298	30	struct hmm_range *range;
299	31	unsigned long last;
300		- bool fault;
301		- bool block;
302	32	};
303	33
304		-static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
305		- bool write_fault, uint64_t *pfn)
	34	+enum {
	35	+ HMM_NEED_FAULT = 1 << 0,
	36	+ HMM_NEED_WRITE_FAULT = 1 << 1,
	37	+ HMM_NEED_ALL_BITS = HMM_NEED_FAULT \| HMM_NEED_WRITE_FAULT,
	38	+};
	39	+
	40	+static int hmm_pfns_fill(unsigned long addr, unsigned long end,
	41	+ struct hmm_range *range, unsigned long cpu_flags)
306	42	{
307		- unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_REMOTE;
308		- struct hmm_vma_walk *hmm_vma_walk = walk->private;
309		- struct hmm_range *range = hmm_vma_walk->range;
310		- struct vm_area_struct *vma = walk->vma;
311		- vm_fault_t ret;
	43	+ unsigned long i = (addr - range->start) >> PAGE_SHIFT;
312	44
313		- flags \|= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
314		- flags \|= write_fault ? FAULT_FLAG_WRITE : 0;
315		- ret = handle_mm_fault(vma, addr, flags);
316		- if (ret & VM_FAULT_RETRY)
317		- return -EBUSY;
318		- if (ret & VM_FAULT_ERROR) {
319		- *pfn = range->values[HMM_PFN_ERROR];
320		- return -EFAULT;
321		- }
322		-
323		- return -EAGAIN;
324		-}
325		-
326		-static int hmm_pfns_bad(unsigned long addr,
327		- unsigned long end,
328		- struct mm_walk *walk)
329		-{
330		- struct hmm_vma_walk *hmm_vma_walk = walk->private;
331		- struct hmm_range *range = hmm_vma_walk->range;
332		- uint64_t *pfns = range->pfns;
333		- unsigned long i;
334		-
335		- i = (addr - range->start) >> PAGE_SHIFT;
336	45	for (; addr < end; addr += PAGE_SIZE, i++)
337		- pfns[i] = range->values[HMM_PFN_ERROR];
338		-
	46	+ range->hmm_pfns[i] = cpu_flags;
339	47	return 0;
340	48	}
341	49
342	50	/*
343		- * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
344		- * @start: range virtual start address (inclusive)
	51	+ * hmm_vma_fault() - fault in a range lacking valid pmd or pte(s)
	52	+ * @addr: range virtual start address (inclusive)
345	53	* @end: range virtual end address (exclusive)
346		- * @fault: should we fault or not ?
347		- * @write_fault: write fault ?
	54	+ * @required_fault: HMM_NEED_* flags
348	55	* @walk: mm_walk structure
349		- * Returns: 0 on success, -EAGAIN after page fault, or page fault error
	56	+ * Return: -EBUSY after page fault, or page fault error
350	57	*
351	58	* This function will be called whenever pmd_none() or pte_none() returns true,
352	59	* or whenever there is no page directory covering the virtual address range.
353	60	*/
354		-static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
355		- bool fault, bool write_fault,
356		- struct mm_walk *walk)
	61	+static int hmm_vma_fault(unsigned long addr, unsigned long end,
	62	+ unsigned int required_fault, struct mm_walk *walk)
357	63	{
358	64	struct hmm_vma_walk *hmm_vma_walk = walk->private;
359		- struct hmm_range *range = hmm_vma_walk->range;
360		- uint64_t *pfns = range->pfns;
361		- unsigned long i;
	65	+ struct vm_area_struct *vma = walk->vma;
	66	+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
362	67
	68	+ WARN_ON_ONCE(!required_fault);
363	69	hmm_vma_walk->last = addr;
364		- i = (addr - range->start) >> PAGE_SHIFT;
365		- for (; addr < end; addr += PAGE_SIZE, i++) {
366		- pfns[i] = range->values[HMM_PFN_NONE];
367		- if (fault \|\| write_fault) {
368		- int ret;
369	70
370		- ret = hmm_vma_do_fault(walk, addr, write_fault,
371		- &pfns[i]);
372		- if (ret != -EAGAIN)
373		- return ret;
374		- }
	71	+ if (required_fault & HMM_NEED_WRITE_FAULT) {
	72	+ if (!(vma->vm_flags & VM_WRITE))
	73	+ return -EPERM;
	74	+ fault_flags \|= FAULT_FLAG_WRITE;
375	75	}
376	76
377		- return (fault \|\| write_fault) ? -EAGAIN : 0;
	77	+ for (; addr < end; addr += PAGE_SIZE)
	78	+ if (handle_mm_fault(vma, addr, fault_flags, NULL) &
	79	+ VM_FAULT_ERROR)
	80	+ return -EFAULT;
	81	+ return -EBUSY;
378	82	}
379	83
380		-static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
381		- uint64_t pfns, uint64_t cpu_flags,
382		- bool fault, bool write_fault)
	84	+static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
	85	+ unsigned long pfn_req_flags,
	86	+ unsigned long cpu_flags)
383	87	{
384	88	struct hmm_range *range = hmm_vma_walk->range;
385	89
386		- fault = write_fault = false;
387		- if (!hmm_vma_walk->fault)
388		- return;
	90	+ /*
	91	+ * So we not only consider the individual per page request we also
	92	+ * consider the default flags requested for the range. The API can
	93	+ * be used 2 ways. The first one where the HMM user coalesces
	94	+ * multiple page faults into one request and sets flags per pfn for
	95	+ * those faults. The second one where the HMM user wants to pre-
	96	+ * fault a range with specific flags. For the latter one it is a
	97	+ * waste to have the user pre-fill the pfn arrays with a default
	98	+ * flags value.
	99	+ */
	100	+ pfn_req_flags &= range->pfn_flags_mask;
	101	+ pfn_req_flags \|= range->default_flags;
389	102
390	103	/* We aren't ask to do anything ... */
391		- if (!(pfns & range->flags[HMM_PFN_VALID]))
392		- return;
393		- /* If this is device memory than only fault if explicitly requested */
394		- if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
395		- /* Do we fault on device memory ? */
396		- if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
397		- *write_fault = pfns & range->flags[HMM_PFN_WRITE];
398		- *fault = true;
399		- }
400		- return;
401		- }
	104	+ if (!(pfn_req_flags & HMM_PFN_REQ_FAULT))
	105	+ return 0;
	106	+
	107	+ /* Need to write fault ? */
	108	+ if ((pfn_req_flags & HMM_PFN_REQ_WRITE) &&
	109	+ !(cpu_flags & HMM_PFN_WRITE))
	110	+ return HMM_NEED_FAULT \| HMM_NEED_WRITE_FAULT;
402	111
403	112	/* If CPU page table is not valid then we need to fault */
404		- *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
405		- /* Need to write fault ? */
406		- if ((pfns & range->flags[HMM_PFN_WRITE]) &&
407		- !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
408		- *write_fault = true;
409		- *fault = true;
410		- }
	113	+ if (!(cpu_flags & HMM_PFN_VALID))
	114	+ return HMM_NEED_FAULT;
	115	+ return 0;
411	116	}
412	117
413		-static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
414		- const uint64_t *pfns, unsigned long npages,
415		- uint64_t cpu_flags, bool *fault,
416		- bool *write_fault)
	118	+static unsigned int
	119	+hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
	120	+ const unsigned long hmm_pfns[], unsigned long npages,
	121	+ unsigned long cpu_flags)
417	122	{
	123	+ struct hmm_range *range = hmm_vma_walk->range;
	124	+ unsigned int required_fault = 0;
418	125	unsigned long i;
419	126
420		- if (!hmm_vma_walk->fault) {
421		- fault = write_fault = false;
422		- return;
423		- }
	127	+ /*
	128	+ * If the default flags do not request to fault pages, and the mask does
	129	+ * not allow for individual pages to be faulted, then
	130	+ * hmm_pte_need_fault() will always return 0.
	131	+ */
	132	+ if (!((range->default_flags \| range->pfn_flags_mask) &
	133	+ HMM_PFN_REQ_FAULT))
	134	+ return 0;
424	135
425	136	for (i = 0; i < npages; ++i) {
426		- hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
427		- fault, write_fault);
428		- if ((fault) \|\| (write_fault))
429		- return;
	137	+ required_fault \|= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i],
	138	+ cpu_flags);
	139	+ if (required_fault == HMM_NEED_ALL_BITS)
	140	+ return required_fault;
430	141	}
	142	+ return required_fault;
431	143	}
432	144
433	145	static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
434		- struct mm_walk *walk)
	146	+ __always_unused int depth, struct mm_walk *walk)
435	147	{
436	148	struct hmm_vma_walk *hmm_vma_walk = walk->private;
437	149	struct hmm_range *range = hmm_vma_walk->range;
438		- bool fault, write_fault;
	150	+ unsigned int required_fault;
439	151	unsigned long i, npages;
440		- uint64_t *pfns;
	152	+ unsigned long *hmm_pfns;
441	153
442	154	i = (addr - range->start) >> PAGE_SHIFT;
443	155	npages = (end - addr) >> PAGE_SHIFT;
444		- pfns = &range->pfns[i];
445		- hmm_range_need_fault(hmm_vma_walk, pfns, npages,
446		- 0, &fault, &write_fault);
447		- return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
	156	+ hmm_pfns = &range->hmm_pfns[i];
	157	+ required_fault =
	158	+ hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0);
	159	+ if (!walk->vma) {
	160	+ if (required_fault)
	161	+ return -EFAULT;
	162	+ return hmm_pfns_fill(addr, end, range, HMM_PFN_ERROR);
	163	+ }
	164	+ if (required_fault)
	165	+ return hmm_vma_fault(addr, end, required_fault, walk);
	166	+ return hmm_pfns_fill(addr, end, range, 0);
448	167	}
449	168
450		-static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
	169	+static inline unsigned long hmm_pfn_flags_order(unsigned long order)
	170	+{
	171	+ return order << HMM_PFN_ORDER_SHIFT;
	172	+}
	173	+
	174	+static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
	175	+ pmd_t pmd)
451	176	{
452	177	if (pmd_protnone(pmd))
453	178	return 0;
454		- return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] \|
455		- range->flags[HMM_PFN_WRITE] :
456		- range->flags[HMM_PFN_VALID];
	179	+ return (pmd_write(pmd) ? (HMM_PFN_VALID \| HMM_PFN_WRITE) :
	180	+ HMM_PFN_VALID) \|
	181	+ hmm_pfn_flags_order(PMD_SHIFT - PAGE_SHIFT);
457	182	}
458	183
459		-static int hmm_vma_handle_pmd(struct mm_walk *walk,
460		- unsigned long addr,
461		- unsigned long end,
462		- uint64_t *pfns,
	184	+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	185	+static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
	186	+ unsigned long end, unsigned long hmm_pfns[],
463	187	pmd_t pmd)
464	188	{
465	189	struct hmm_vma_walk *hmm_vma_walk = walk->private;
466	190	struct hmm_range *range = hmm_vma_walk->range;
467	191	unsigned long pfn, npages, i;
468		- bool fault, write_fault;
469		- uint64_t cpu_flags;
	192	+ unsigned int required_fault;
	193	+ unsigned long cpu_flags;
470	194
471	195	npages = (end - addr) >> PAGE_SHIFT;
472	196	cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
473		- hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
474		- &fault, &write_fault);
	197	+ required_fault =
	198	+ hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags);
	199	+ if (required_fault)
	200	+ return hmm_vma_fault(addr, end, required_fault, walk);
475	201
476		- if (pmd_protnone(pmd) \|\| fault \|\| write_fault)
477		- return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
478		-
479		- pfn = pmd_pfn(pmd) + pte_index(addr);
	202	+ pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
480	203	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
481		- pfns[i] = hmm_pfn_from_pfn(range, pfn) \| cpu_flags;
482		- hmm_vma_walk->last = end;
	204	+ hmm_pfns[i] = pfn \| cpu_flags;
483	205	return 0;
484	206	}
	207	+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
	208	+/* stub to allow the code below to compile */
	209	+int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
	210	+ unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
	211	+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
485	212
486		-static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
	213	+static inline bool hmm_is_device_private_entry(struct hmm_range *range,
	214	+ swp_entry_t entry)
487	215	{
488		- if (pte_none(pte) \|\| !pte_present(pte))
	216	+ return is_device_private_entry(entry) &&
	217	+ device_private_entry_to_page(entry)->pgmap->owner ==
	218	+ range->dev_private_owner;
	219	+}
	220	+
	221	+static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
	222	+ pte_t pte)
	223	+{
	224	+ if (pte_none(pte) \|\| !pte_present(pte) \|\| pte_protnone(pte))
489	225	return 0;
490		- return pte_write(pte) ? range->flags[HMM_PFN_VALID] \|
491		- range->flags[HMM_PFN_WRITE] :
492		- range->flags[HMM_PFN_VALID];
	226	+ return pte_write(pte) ? (HMM_PFN_VALID \| HMM_PFN_WRITE) : HMM_PFN_VALID;
493	227	}
494	228
495	229	static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
496	230	unsigned long end, pmd_t pmdp, pte_t ptep,
497		- uint64_t *pfn)
	231	+ unsigned long *hmm_pfn)
498	232	{
499	233	struct hmm_vma_walk *hmm_vma_walk = walk->private;
500	234	struct hmm_range *range = hmm_vma_walk->range;
501		- struct vm_area_struct *vma = walk->vma;
502		- bool fault, write_fault;
503		- uint64_t cpu_flags;
	235	+ unsigned int required_fault;
	236	+ unsigned long cpu_flags;
504	237	pte_t pte = *ptep;
505		- uint64_t orig_pfn = *pfn;
506		-
507		- *pfn = range->values[HMM_PFN_NONE];
508		- cpu_flags = pte_to_hmm_pfn_flags(range, pte);
509		- hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
510		- &fault, &write_fault);
	238	+ uint64_t pfn_req_flags = *hmm_pfn;
511	239
512	240	if (pte_none(pte)) {
513		- if (fault \|\| write_fault)
	241	+ required_fault =
	242	+ hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
	243	+ if (required_fault)
514	244	goto fault;
	245	+ *hmm_pfn = 0;
515	246	return 0;
516	247	}
517	248
518	249	if (!pte_present(pte)) {
519	250	swp_entry_t entry = pte_to_swp_entry(pte);
520	251
521		- if (!non_swap_entry(entry)) {
522		- if (fault \|\| write_fault)
523		- goto fault;
	252	+ /*
	253	+ * Never fault in device private pages, but just report
	254	+ * the PFN even if not present.
	255	+ */
	256	+ if (hmm_is_device_private_entry(range, entry)) {
	257	+ cpu_flags = HMM_PFN_VALID;
	258	+ if (is_write_device_private_entry(entry))
	259	+ cpu_flags \|= HMM_PFN_WRITE;
	260	+ *hmm_pfn = device_private_entry_to_pfn(entry) \|
	261	+ cpu_flags;
524	262	return 0;
525	263	}
526	264
527		- /*
528		- * This is a special swap entry, ignore migration, use
529		- * device and report anything else as error.
530		- */
531		- if (is_device_private_entry(entry)) {
532		- cpu_flags = range->flags[HMM_PFN_VALID] \|
533		- range->flags[HMM_PFN_DEVICE_PRIVATE];
534		- cpu_flags \|= is_write_device_private_entry(entry) ?
535		- range->flags[HMM_PFN_WRITE] : 0;
536		- hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
537		- &fault, &write_fault);
538		- if (fault \|\| write_fault)
539		- goto fault;
540		- *pfn = hmm_pfn_from_pfn(range, swp_offset(entry));
541		- *pfn \|= cpu_flags;
	265	+ required_fault =
	266	+ hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
	267	+ if (!required_fault) {
	268	+ *hmm_pfn = 0;
542	269	return 0;
543	270	}
	271	+
	272	+ if (!non_swap_entry(entry))
	273	+ goto fault;
544	274
545	275	if (is_migration_entry(entry)) {
546		- if (fault \|\| write_fault) {
547		- pte_unmap(ptep);
548		- hmm_vma_walk->last = addr;
549		- migration_entry_wait(vma->vm_mm,
550		- pmdp, addr);
551		- return -EAGAIN;
552		- }
553		- return 0;
	276	+ pte_unmap(ptep);
	277	+ hmm_vma_walk->last = addr;
	278	+ migration_entry_wait(walk->mm, pmdp, addr);
	279	+ return -EBUSY;
554	280	}
555	281
556	282	/* Report error for everything else */
557		- *pfn = range->values[HMM_PFN_ERROR];
	283	+ pte_unmap(ptep);
558	284	return -EFAULT;
559	285	}
560	286
561		- if (fault \|\| write_fault)
	287	+ cpu_flags = pte_to_hmm_pfn_flags(range, pte);
	288	+ required_fault =
	289	+ hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
	290	+ if (required_fault)
562	291	goto fault;
563	292
564		- *pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) \| cpu_flags;
	293	+ /*
	294	+ * Bypass devmap pte such as DAX page when all pfn requested
	295	+ * flags(pfn_req_flags) are fulfilled.
	296	+ * Since each architecture defines a struct page for the zero page, just
	297	+ * fall through and treat it like a normal page.
	298	+ */
	299	+ if (!vm_normal_page(walk->vma, addr, pte) &&
	300	+ !pte_devmap(pte) &&
	301	+ !is_zero_pfn(pte_pfn(pte))) {
	302	+ if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
	303	+ pte_unmap(ptep);
	304	+ return -EFAULT;
	305	+ }
	306	+ *hmm_pfn = HMM_PFN_ERROR;
	307	+ return 0;
	308	+ }
	309	+
	310	+ *hmm_pfn = pte_pfn(pte) \| cpu_flags;
565	311	return 0;
566	312
567	313	fault:
568	314	pte_unmap(ptep);
569	315	/* Fault any virtual address we were asked to fault */
570		- return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
	316	+ return hmm_vma_fault(addr, end, required_fault, walk);
571	317	}
572	318
573	319	static int hmm_vma_walk_pmd(pmd_t *pmdp,
..	..	@@ -577,28 +323,40 @@
577	323	{
578	324	struct hmm_vma_walk *hmm_vma_walk = walk->private;
579	325	struct hmm_range *range = hmm_vma_walk->range;
580		- uint64_t *pfns = range->pfns;
581		- unsigned long addr = start, i;
	326	+ unsigned long *hmm_pfns =
	327	+ &range->hmm_pfns[(start - range->start) >> PAGE_SHIFT];
	328	+ unsigned long npages = (end - start) >> PAGE_SHIFT;
	329	+ unsigned long addr = start;
582	330	pte_t *ptep;
583		-
584		- i = (addr - range->start) >> PAGE_SHIFT;
	331	+ pmd_t pmd;
585	332
586	333	again:
587		- if (pmd_none(*pmdp))
588		- return hmm_vma_walk_hole(start, end, walk);
	334	+ pmd = READ_ONCE(*pmdp);
	335	+ if (pmd_none(pmd))
	336	+ return hmm_vma_walk_hole(start, end, -1, walk);
589	337
590		- if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB))
591		- return hmm_pfns_bad(start, end, walk);
	338	+ if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
	339	+ if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) {
	340	+ hmm_vma_walk->last = addr;
	341	+ pmd_migration_entry_wait(walk->mm, pmdp);
	342	+ return -EBUSY;
	343	+ }
	344	+ return hmm_pfns_fill(start, end, range, 0);
	345	+ }
592	346
593		- if (pmd_devmap(pmdp) \|\| pmd_trans_huge(pmdp)) {
594		- pmd_t pmd;
	347	+ if (!pmd_present(pmd)) {
	348	+ if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
	349	+ return -EFAULT;
	350	+ return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
	351	+ }
595	352
	353	+ if (pmd_devmap(pmd) \|\| pmd_trans_huge(pmd)) {
596	354	/*
597		- * No need to take pmd_lock here, even if some other threads
	355	+ * No need to take pmd_lock here, even if some other thread
598	356	* is splitting the huge pmd we will get that event through
599	357	* mmu_notifier callback.
600	358	*
601		- * So just read pmd value and check again its a transparent
	359	+ * So just read pmd value and check again it's a transparent
602	360	* huge or device mapping one and compute corresponding pfn
603	361	* values.
604	362	*/
..	..	@@ -607,742 +365,235 @@
607	365	if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
608	366	goto again;
609	367
610		- return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
	368	+ return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
611	369	}
612	370
613		- if (pmd_bad(*pmdp))
614		- return hmm_pfns_bad(start, end, walk);
	371	+ /*
	372	+ * We have handled all the valid cases above ie either none, migration,
	373	+ * huge or transparent huge. At this point either it is a valid pmd
	374	+ * entry pointing to pte directory or it is a bad pmd that will not
	375	+ * recover.
	376	+ */
	377	+ if (pmd_bad(pmd)) {
	378	+ if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
	379	+ return -EFAULT;
	380	+ return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
	381	+ }
615	382
616	383	ptep = pte_offset_map(pmdp, addr);
617		- for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
	384	+ for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) {
618	385	int r;
619	386
620		- r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
	387	+ r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns);
621	388	if (r) {
622		- /* hmm_vma_handle_pte() did unmap pte directory */
623		- hmm_vma_walk->last = addr;
	389	+ /* hmm_vma_handle_pte() did pte_unmap() */
624	390	return r;
625	391	}
626	392	}
627	393	pte_unmap(ptep - 1);
628		-
629		- hmm_vma_walk->last = addr;
630	394	return 0;
631	395	}
632	396
633		-static void hmm_pfns_clear(struct hmm_range *range,
634		- uint64_t *pfns,
635		- unsigned long addr,
636		- unsigned long end)
	397	+#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
	398	+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
	399	+static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range,
	400	+ pud_t pud)
637	401	{
638		- for (; addr < end; addr += PAGE_SIZE, pfns++)
639		- *pfns = range->values[HMM_PFN_NONE];
	402	+ if (!pud_present(pud))
	403	+ return 0;
	404	+ return (pud_write(pud) ? (HMM_PFN_VALID \| HMM_PFN_WRITE) :
	405	+ HMM_PFN_VALID) \|
	406	+ hmm_pfn_flags_order(PUD_SHIFT - PAGE_SHIFT);
640	407	}
641	408
642		-static void hmm_pfns_special(struct hmm_range *range)
	409	+static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
	410	+ struct mm_walk *walk)
643	411	{
644		- unsigned long addr = range->start, i = 0;
	412	+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
	413	+ struct hmm_range *range = hmm_vma_walk->range;
	414	+ unsigned long addr = start;
	415	+ pud_t pud;
	416	+ int ret = 0;
	417	+ spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma);
645	418
646		- for (; addr < range->end; addr += PAGE_SIZE, i++)
647		- range->pfns[i] = range->values[HMM_PFN_SPECIAL];
	419	+ if (!ptl)
	420	+ return 0;
	421	+
	422	+ /* Normally we don't want to split the huge page */
	423	+ walk->action = ACTION_CONTINUE;
	424	+
	425	+ pud = READ_ONCE(*pudp);
	426	+ if (pud_none(pud)) {
	427	+ spin_unlock(ptl);
	428	+ return hmm_vma_walk_hole(start, end, -1, walk);
	429	+ }
	430	+
	431	+ if (pud_huge(pud) && pud_devmap(pud)) {
	432	+ unsigned long i, npages, pfn;
	433	+ unsigned int required_fault;
	434	+ unsigned long *hmm_pfns;
	435	+ unsigned long cpu_flags;
	436	+
	437	+ if (!pud_present(pud)) {
	438	+ spin_unlock(ptl);
	439	+ return hmm_vma_walk_hole(start, end, -1, walk);
	440	+ }
	441	+
	442	+ i = (addr - range->start) >> PAGE_SHIFT;
	443	+ npages = (end - addr) >> PAGE_SHIFT;
	444	+ hmm_pfns = &range->hmm_pfns[i];
	445	+
	446	+ cpu_flags = pud_to_hmm_pfn_flags(range, pud);
	447	+ required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns,
	448	+ npages, cpu_flags);
	449	+ if (required_fault) {
	450	+ spin_unlock(ptl);
	451	+ return hmm_vma_fault(addr, end, required_fault, walk);
	452	+ }
	453	+
	454	+ pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
	455	+ for (i = 0; i < npages; ++i, ++pfn)
	456	+ hmm_pfns[i] = pfn \| cpu_flags;
	457	+ goto out_unlock;
	458	+ }
	459	+
	460	+ /* Ask for the PUD to be split */
	461	+ walk->action = ACTION_SUBTREE;
	462	+
	463	+out_unlock:
	464	+ spin_unlock(ptl);
	465	+ return ret;
648	466	}
	467	+#else
	468	+#define hmm_vma_walk_pud NULL
	469	+#endif
649	470
650		-/*
651		- * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
652		- * @range: range being snapshotted
653		- * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
654		- * vma permission, 0 success
655		- *
656		- * This snapshots the CPU page table for a range of virtual addresses. Snapshot
657		- * validity is tracked by range struct. See hmm_vma_range_done() for further
658		- * information.
659		- *
660		- * The range struct is initialized here. It tracks the CPU page table, but only
661		- * if the function returns success (0), in which case the caller must then call
662		- * hmm_vma_range_done() to stop CPU page table update tracking on this range.
663		- *
664		- * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
665		- * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
666		- */
667		-int hmm_vma_get_pfns(struct hmm_range *range)
	471	+#ifdef CONFIG_HUGETLB_PAGE
	472	+static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
	473	+ unsigned long start, unsigned long end,
	474	+ struct mm_walk *walk)
668	475	{
669		- struct vm_area_struct *vma = range->vma;
670		- struct hmm_vma_walk hmm_vma_walk;
671		- struct mm_walk mm_walk;
672		- struct hmm *hmm;
	476	+ unsigned long addr = start, i, pfn;
	477	+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
	478	+ struct hmm_range *range = hmm_vma_walk->range;
	479	+ struct vm_area_struct *vma = walk->vma;
	480	+ unsigned int required_fault;
	481	+ unsigned long pfn_req_flags;
	482	+ unsigned long cpu_flags;
	483	+ spinlock_t *ptl;
	484	+ pte_t entry;
673	485
674		- /* Sanity check, this really should not happen ! */
675		- if (range->start < vma->vm_start \|\| range->start >= vma->vm_end)
676		- return -EINVAL;
677		- if (range->end < vma->vm_start \|\| range->end > vma->vm_end)
678		- return -EINVAL;
	486	+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
	487	+ entry = huge_ptep_get(pte);
679	488
680		- hmm = hmm_register(vma->vm_mm);
681		- if (!hmm)
682		- return -ENOMEM;
683		- /* Caller must have registered a mirror, via hmm_mirror_register() ! */
684		- if (!hmm->mmu_notifier.ops)
685		- return -EINVAL;
686		-
687		- /* FIXME support hugetlb fs */
688		- if (is_vm_hugetlb_page(vma) \|\| (vma->vm_flags & VM_SPECIAL) \|\|
689		- vma_is_dax(vma)) {
690		- hmm_pfns_special(range);
691		- return -EINVAL;
	489	+ i = (start - range->start) >> PAGE_SHIFT;
	490	+ pfn_req_flags = range->hmm_pfns[i];
	491	+ cpu_flags = pte_to_hmm_pfn_flags(range, entry) \|
	492	+ hmm_pfn_flags_order(huge_page_order(hstate_vma(vma)));
	493	+ required_fault =
	494	+ hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
	495	+ if (required_fault) {
	496	+ spin_unlock(ptl);
	497	+ return hmm_vma_fault(addr, end, required_fault, walk);
692	498	}
693	499
694		- if (!(vma->vm_flags & VM_READ)) {
695		- /*
696		- * If vma do not allow read access, then assume that it does
697		- * not allow write access, either. Architecture that allow
698		- * write without read access are not supported by HMM, because
699		- * operations such has atomic access would not work.
700		- */
701		- hmm_pfns_clear(range, range->pfns, range->start, range->end);
702		- return -EPERM;
703		- }
	500	+ pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
	501	+ for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
	502	+ range->hmm_pfns[i] = pfn \| cpu_flags;
704	503
705		- /* Initialize range to track CPU page table update */
706		- spin_lock(&hmm->lock);
707		- range->valid = true;
708		- list_add_rcu(&range->list, &hmm->ranges);
709		- spin_unlock(&hmm->lock);
710		-
711		- hmm_vma_walk.fault = false;
712		- hmm_vma_walk.range = range;
713		- mm_walk.private = &hmm_vma_walk;
714		-
715		- mm_walk.vma = vma;
716		- mm_walk.mm = vma->vm_mm;
717		- mm_walk.pte_entry = NULL;
718		- mm_walk.test_walk = NULL;
719		- mm_walk.hugetlb_entry = NULL;
720		- mm_walk.pmd_entry = hmm_vma_walk_pmd;
721		- mm_walk.pte_hole = hmm_vma_walk_hole;
722		-
723		- walk_page_range(range->start, range->end, &mm_walk);
	504	+ spin_unlock(ptl);
724	505	return 0;
725	506	}
726		-EXPORT_SYMBOL(hmm_vma_get_pfns);
	507	+#else
	508	+#define hmm_vma_walk_hugetlb_entry NULL
	509	+#endif /* CONFIG_HUGETLB_PAGE */
727	510
728		-/*
729		- * hmm_vma_range_done() - stop tracking change to CPU page table over a range
730		- * @range: range being tracked
731		- * Returns: false if range data has been invalidated, true otherwise
732		- *
733		- * Range struct is used to track updates to the CPU page table after a call to
734		- * either hmm_vma_get_pfns() or hmm_vma_fault(). Once the device driver is done
735		- * using the data, or wants to lock updates to the data it got from those
736		- * functions, it must call the hmm_vma_range_done() function, which will then
737		- * stop tracking CPU page table updates.
738		- *
739		- * Note that device driver must still implement general CPU page table update
740		- * tracking either by using hmm_mirror (see hmm_mirror_register()) or by using
741		- * the mmu_notifier API directly.
742		- *
743		- * CPU page table update tracking done through hmm_range is only temporary and
744		- * to be used while trying to duplicate CPU page table contents for a range of
745		- * virtual addresses.
746		- *
747		- * There are two ways to use this :
748		- * again:
749		- * hmm_vma_get_pfns(range); or hmm_vma_fault(...);
750		- * trans = device_build_page_table_update_transaction(pfns);
751		- * device_page_table_lock();
752		- * if (!hmm_vma_range_done(range)) {
753		- * device_page_table_unlock();
754		- * goto again;
755		- * }
756		- * device_commit_transaction(trans);
757		- * device_page_table_unlock();
758		- *
759		- * Or:
760		- * hmm_vma_get_pfns(range); or hmm_vma_fault(...);
761		- * device_page_table_lock();
762		- * hmm_vma_range_done(range);
763		- * device_update_page_table(range->pfns);
764		- * device_page_table_unlock();
765		- */
766		-bool hmm_vma_range_done(struct hmm_range *range)
	511	+static int hmm_vma_walk_test(unsigned long start, unsigned long end,
	512	+ struct mm_walk *walk)
767	513	{
768		- unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
769		- struct hmm *hmm;
	514	+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
	515	+ struct hmm_range *range = hmm_vma_walk->range;
	516	+ struct vm_area_struct *vma = walk->vma;
770	517
771		- if (range->end <= range->start) {
772		- BUG();
773		- return false;
774		- }
	518	+ if (!(vma->vm_flags & (VM_IO \| VM_PFNMAP)) &&
	519	+ vma->vm_flags & VM_READ)
	520	+ return 0;
775	521
776		- hmm = hmm_register(range->vma->vm_mm);
777		- if (!hmm) {
778		- memset(range->pfns, 0, sizeof(range->pfns) npages);
779		- return false;
780		- }
	522	+ /*
	523	+ * vma ranges that don't have struct page backing them or map I/O
	524	+ * devices directly cannot be handled by hmm_range_fault().
	525	+ *
	526	+ * If the vma does not allow read access, then assume that it does not
	527	+ * allow write access either. HMM does not support architectures that
	528	+ * allow write without read.
	529	+ *
	530	+ * If a fault is requested for an unsupported range then it is a hard
	531	+ * failure.
	532	+ */
	533	+ if (hmm_range_need_fault(hmm_vma_walk,
	534	+ range->hmm_pfns +
	535	+ ((start - range->start) >> PAGE_SHIFT),
	536	+ (end - start) >> PAGE_SHIFT, 0))
	537	+ return -EFAULT;
781	538
782		- spin_lock(&hmm->lock);
783		- list_del_rcu(&range->list);
784		- spin_unlock(&hmm->lock);
	539	+ hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
785	540
786		- return range->valid;
	541	+ /* Skip this vma and continue processing the next vma. */
	542	+ return 1;
787	543	}
788		-EXPORT_SYMBOL(hmm_vma_range_done);
789	544
790		-/*
791		- * hmm_vma_fault() - try to fault some address in a virtual address range
792		- * @range: range being faulted
793		- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
794		- * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
	545	+static const struct mm_walk_ops hmm_walk_ops = {
	546	+ .pud_entry = hmm_vma_walk_pud,
	547	+ .pmd_entry = hmm_vma_walk_pmd,
	548	+ .pte_hole = hmm_vma_walk_hole,
	549	+ .hugetlb_entry = hmm_vma_walk_hugetlb_entry,
	550	+ .test_walk = hmm_vma_walk_test,
	551	+};
	552	+
	553	+/**
	554	+ * hmm_range_fault - try to fault some address in a virtual address range
	555	+ * @range: argument structure
795	556	*
796		- * This is similar to a regular CPU page fault except that it will not trigger
797		- * any memory migration if the memory being faulted is not accessible by CPUs.
	557	+ * Returns 0 on success or one of the following error codes:
798	558	*
799		- * On error, for one virtual address in the range, the function will mark the
800		- * corresponding HMM pfn entry with an error flag.
	559	+ * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma
	560	+ * (e.g., device file vma).
	561	+ * -ENOMEM: Out of memory.
	562	+ * -EPERM: Invalid permission (e.g., asking for write and range is read
	563	+ * only).
	564	+ * -EBUSY: The range has been invalidated and the caller needs to wait for
	565	+ * the invalidation to finish.
	566	+ * -EFAULT: A page was requested to be valid and could not be made valid
	567	+ * ie it has no backing VMA or it is illegal to access
801	568	*
802		- * Expected use pattern:
803		- * retry:
804		- * down_read(&mm->mmap_sem);
805		- * // Find vma and address device wants to fault, initialize hmm_pfn_t
806		- * // array accordingly
807		- * ret = hmm_vma_fault(range, write, block);
808		- * switch (ret) {
809		- * case -EAGAIN:
810		- * hmm_vma_range_done(range);
811		- * // You might want to rate limit or yield to play nicely, you may
812		- * // also commit any valid pfn in the array assuming that you are
813		- * // getting true from hmm_vma_range_monitor_end()
814		- * goto retry;
815		- * case 0:
816		- * break;
817		- * case -ENOMEM:
818		- * case -EINVAL:
819		- * case -EPERM:
820		- * default:
821		- * // Handle error !
822		- * up_read(&mm->mmap_sem)
823		- * return;
824		- * }
825		- * // Take device driver lock that serialize device page table update
826		- * driver_lock_device_page_table_update();
827		- * hmm_vma_range_done(range);
828		- * // Commit pfns we got from hmm_vma_fault()
829		- * driver_unlock_device_page_table_update();
830		- * up_read(&mm->mmap_sem)
831		- *
832		- * YOU MUST CALL hmm_vma_range_done() AFTER THIS FUNCTION RETURN SUCCESS (0)
833		- * BEFORE FREEING THE range struct OR YOU WILL HAVE SERIOUS MEMORY CORRUPTION !
834		- *
835		- * YOU HAVE BEEN WARNED !
	569	+ * This is similar to get_user_pages(), except that it can read the page tables
	570	+ * without mutating them (ie causing faults).
836	571	*/
837		-int hmm_vma_fault(struct hmm_range *range, bool block)
	572	+int hmm_range_fault(struct hmm_range *range)
838	573	{
839		- struct vm_area_struct *vma = range->vma;
840		- unsigned long start = range->start;
841		- struct hmm_vma_walk hmm_vma_walk;
842		- struct mm_walk mm_walk;
843		- struct hmm *hmm;
	574	+ struct hmm_vma_walk hmm_vma_walk = {
	575	+ .range = range,
	576	+ .last = range->start,
	577	+ };
	578	+ struct mm_struct *mm = range->notifier->mm;
844	579	int ret;
845	580
846		- /* Sanity check, this really should not happen ! */
847		- if (range->start < vma->vm_start \|\| range->start >= vma->vm_end)
848		- return -EINVAL;
849		- if (range->end < vma->vm_start \|\| range->end > vma->vm_end)
850		- return -EINVAL;
851		-
852		- hmm = hmm_register(vma->vm_mm);
853		- if (!hmm) {
854		- hmm_pfns_clear(range, range->pfns, range->start, range->end);
855		- return -ENOMEM;
856		- }
857		- /* Caller must have registered a mirror using hmm_mirror_register() */
858		- if (!hmm->mmu_notifier.ops)
859		- return -EINVAL;
860		-
861		- /* FIXME support hugetlb fs */
862		- if (is_vm_hugetlb_page(vma) \|\| (vma->vm_flags & VM_SPECIAL) \|\|
863		- vma_is_dax(vma)) {
864		- hmm_pfns_special(range);
865		- return -EINVAL;
866		- }
867		-
868		- if (!(vma->vm_flags & VM_READ)) {
869		- /*
870		- * If vma do not allow read access, then assume that it does
871		- * not allow write access, either. Architecture that allow
872		- * write without read access are not supported by HMM, because
873		- * operations such has atomic access would not work.
874		- */
875		- hmm_pfns_clear(range, range->pfns, range->start, range->end);
876		- return -EPERM;
877		- }
878		-
879		- /* Initialize range to track CPU page table update */
880		- spin_lock(&hmm->lock);
881		- range->valid = true;
882		- list_add_rcu(&range->list, &hmm->ranges);
883		- spin_unlock(&hmm->lock);
884		-
885		- hmm_vma_walk.fault = true;
886		- hmm_vma_walk.block = block;
887		- hmm_vma_walk.range = range;
888		- mm_walk.private = &hmm_vma_walk;
889		- hmm_vma_walk.last = range->start;
890		-
891		- mm_walk.vma = vma;
892		- mm_walk.mm = vma->vm_mm;
893		- mm_walk.pte_entry = NULL;
894		- mm_walk.test_walk = NULL;
895		- mm_walk.hugetlb_entry = NULL;
896		- mm_walk.pmd_entry = hmm_vma_walk_pmd;
897		- mm_walk.pte_hole = hmm_vma_walk_hole;
	581	+ mmap_assert_locked(mm);
898	582
899	583	do {
900		- ret = walk_page_range(start, range->end, &mm_walk);
901		- start = hmm_vma_walk.last;
902		- } while (ret == -EAGAIN);
903		-
904		- if (ret) {
905		- unsigned long i;
906		-
907		- i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
908		- hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
909		- range->end);
910		- hmm_vma_range_done(range);
911		- }
	584	+ /* If range is no longer valid force retry. */
	585	+ if (mmu_interval_check_retry(range->notifier,
	586	+ range->notifier_seq))
	587	+ return -EBUSY;
	588	+ ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
	589	+ &hmm_walk_ops, &hmm_vma_walk);
	590	+ /*
	591	+ * When -EBUSY is returned the loop restarts with
	592	+ * hmm_vma_walk.last set to an address that has not been stored
	593	+ * in pfns. All entries < last in the pfn array are set to their
	594	+ * output, and all >= are still at their input values.
	595	+ */
	596	+ } while (ret == -EBUSY);
912	597	return ret;
913	598	}
914		-EXPORT_SYMBOL(hmm_vma_fault);
915		-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
916		-
917		-
918		-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) \|\| IS_ENABLED(CONFIG_DEVICE_PUBLIC)
919		-struct page hmm_vma_alloc_locked_page(struct vm_area_struct vma,
920		- unsigned long addr)
921		-{
922		- struct page *page;
923		-
924		- page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
925		- if (!page)
926		- return NULL;
927		- lock_page(page);
928		- return page;
929		-}
930		-EXPORT_SYMBOL(hmm_vma_alloc_locked_page);
931		-
932		-
933		-static void hmm_devmem_ref_release(struct percpu_ref *ref)
934		-{
935		- struct hmm_devmem *devmem;
936		-
937		- devmem = container_of(ref, struct hmm_devmem, ref);
938		- complete(&devmem->completion);
939		-}
940		-
941		-static void hmm_devmem_ref_exit(void *data)
942		-{
943		- struct percpu_ref *ref = data;
944		- struct hmm_devmem *devmem;
945		-
946		- devmem = container_of(ref, struct hmm_devmem, ref);
947		- percpu_ref_exit(ref);
948		-}
949		-
950		-static void hmm_devmem_ref_kill(void *data)
951		-{
952		- struct percpu_ref *ref = data;
953		- struct hmm_devmem *devmem;
954		-
955		- devmem = container_of(ref, struct hmm_devmem, ref);
956		- percpu_ref_kill(ref);
957		- wait_for_completion(&devmem->completion);
958		-}
959		-
960		-static int hmm_devmem_fault(struct vm_area_struct *vma,
961		- unsigned long addr,
962		- const struct page *page,
963		- unsigned int flags,
964		- pmd_t *pmdp)
965		-{
966		- struct hmm_devmem *devmem = page->pgmap->data;
967		-
968		- return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp);
969		-}
970		-
971		-static void hmm_devmem_free(struct page page, void data)
972		-{
973		- struct hmm_devmem *devmem = data;
974		-
975		- page->mapping = NULL;
976		-
977		- devmem->ops->free(devmem, page);
978		-}
979		-
980		-static DEFINE_MUTEX(hmm_devmem_lock);
981		-static RADIX_TREE(hmm_devmem_radix, GFP_KERNEL);
982		-
983		-static void hmm_devmem_radix_release(struct resource *resource)
984		-{
985		- resource_size_t key;
986		-
987		- mutex_lock(&hmm_devmem_lock);
988		- for (key = resource->start;
989		- key <= resource->end;
990		- key += PA_SECTION_SIZE)
991		- radix_tree_delete(&hmm_devmem_radix, key >> PA_SECTION_SHIFT);
992		- mutex_unlock(&hmm_devmem_lock);
993		-}
994		-
995		-static void hmm_devmem_release(void *data)
996		-{
997		- struct hmm_devmem *devmem = data;
998		- struct resource *resource = devmem->resource;
999		- unsigned long start_pfn, npages;
1000		- struct page *page;
1001		- int nid;
1002		-
1003		- /* pages are dead and unused, undo the arch mapping */
1004		- start_pfn = (resource->start & ~(PA_SECTION_SIZE - 1)) >> PAGE_SHIFT;
1005		- npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT;
1006		-
1007		- page = pfn_to_page(start_pfn);
1008		- nid = page_to_nid(page);
1009		-
1010		- mem_hotplug_begin();
1011		- if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
1012		- __remove_pages(start_pfn, npages, NULL);
1013		- else
1014		- arch_remove_memory(nid, start_pfn << PAGE_SHIFT,
1015		- npages << PAGE_SHIFT, NULL);
1016		- mem_hotplug_done();
1017		-
1018		- hmm_devmem_radix_release(resource);
1019		-}
1020		-
1021		-static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
1022		-{
1023		- resource_size_t key, align_start, align_size, align_end;
1024		- struct device *device = devmem->device;
1025		- int ret, nid, is_ram;
1026		- unsigned long pfn;
1027		-
1028		- align_start = devmem->resource->start & ~(PA_SECTION_SIZE - 1);
1029		- align_size = ALIGN(devmem->resource->start +
1030		- resource_size(devmem->resource),
1031		- PA_SECTION_SIZE) - align_start;
1032		-
1033		- is_ram = region_intersects(align_start, align_size,
1034		- IORESOURCE_SYSTEM_RAM,
1035		- IORES_DESC_NONE);
1036		- if (is_ram == REGION_MIXED) {
1037		- WARN_ONCE(1, "%s attempted on mixed region %pr\n",
1038		- __func__, devmem->resource);
1039		- return -ENXIO;
1040		- }
1041		- if (is_ram == REGION_INTERSECTS)
1042		- return -ENXIO;
1043		-
1044		- if (devmem->resource->desc == IORES_DESC_DEVICE_PUBLIC_MEMORY)
1045		- devmem->pagemap.type = MEMORY_DEVICE_PUBLIC;
1046		- else
1047		- devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
1048		-
1049		- devmem->pagemap.res = *devmem->resource;
1050		- devmem->pagemap.page_fault = hmm_devmem_fault;
1051		- devmem->pagemap.page_free = hmm_devmem_free;
1052		- devmem->pagemap.dev = devmem->device;
1053		- devmem->pagemap.ref = &devmem->ref;
1054		- devmem->pagemap.data = devmem;
1055		-
1056		- mutex_lock(&hmm_devmem_lock);
1057		- align_end = align_start + align_size - 1;
1058		- for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) {
1059		- struct hmm_devmem *dup;
1060		-
1061		- dup = radix_tree_lookup(&hmm_devmem_radix,
1062		- key >> PA_SECTION_SHIFT);
1063		- if (dup) {
1064		- dev_err(device, "%s: collides with mapping for %s\n",
1065		- __func__, dev_name(dup->device));
1066		- mutex_unlock(&hmm_devmem_lock);
1067		- ret = -EBUSY;
1068		- goto error;
1069		- }
1070		- ret = radix_tree_insert(&hmm_devmem_radix,
1071		- key >> PA_SECTION_SHIFT,
1072		- devmem);
1073		- if (ret) {
1074		- dev_err(device, "%s: failed: %d\n", __func__, ret);
1075		- mutex_unlock(&hmm_devmem_lock);
1076		- goto error_radix;
1077		- }
1078		- }
1079		- mutex_unlock(&hmm_devmem_lock);
1080		-
1081		- nid = dev_to_node(device);
1082		- if (nid < 0)
1083		- nid = numa_mem_id();
1084		-
1085		- mem_hotplug_begin();
1086		- /*
1087		- * For device private memory we call add_pages() as we only need to
1088		- * allocate and initialize struct page for the device memory. More-
1089		- * over the device memory is un-accessible thus we do not want to
1090		- * create a linear mapping for the memory like arch_add_memory()
1091		- * would do.
1092		- *
1093		- * For device public memory, which is accesible by the CPU, we do
1094		- * want the linear mapping and thus use arch_add_memory().
1095		- */
1096		- if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC)
1097		- ret = arch_add_memory(nid, align_start, align_size, NULL,
1098		- false);
1099		- else
1100		- ret = add_pages(nid, align_start >> PAGE_SHIFT,
1101		- align_size >> PAGE_SHIFT, NULL, false);
1102		- if (ret) {
1103		- mem_hotplug_done();
1104		- goto error_add_memory;
1105		- }
1106		- move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
1107		- align_start >> PAGE_SHIFT,
1108		- align_size >> PAGE_SHIFT, NULL);
1109		- mem_hotplug_done();
1110		-
1111		- for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
1112		- struct page *page = pfn_to_page(pfn);
1113		-
1114		- page->pgmap = &devmem->pagemap;
1115		- }
1116		- return 0;
1117		-
1118		-error_add_memory:
1119		- untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
1120		-error_radix:
1121		- hmm_devmem_radix_release(devmem->resource);
1122		-error:
1123		- return ret;
1124		-}
1125		-
1126		-/*
1127		- * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory
1128		- *
1129		- * @ops: memory event device driver callback (see struct hmm_devmem_ops)
1130		- * @device: device struct to bind the resource too
1131		- * @size: size in bytes of the device memory to add
1132		- * Returns: pointer to new hmm_devmem struct ERR_PTR otherwise
1133		- *
1134		- * This function first finds an empty range of physical address big enough to
1135		- * contain the new resource, and then hotplugs it as ZONE_DEVICE memory, which
1136		- * in turn allocates struct pages. It does not do anything beyond that; all
1137		- * events affecting the memory will go through the various callbacks provided
1138		- * by hmm_devmem_ops struct.
1139		- *
1140		- * Device driver should call this function during device initialization and
1141		- * is then responsible of memory management. HMM only provides helpers.
1142		- */
1143		-struct hmm_devmem hmm_devmem_add(const struct hmm_devmem_ops ops,
1144		- struct device *device,
1145		- unsigned long size)
1146		-{
1147		- struct hmm_devmem *devmem;
1148		- resource_size_t addr;
1149		- int ret;
1150		-
1151		- dev_pagemap_get_ops();
1152		-
1153		- devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
1154		- if (!devmem)
1155		- return ERR_PTR(-ENOMEM);
1156		-
1157		- init_completion(&devmem->completion);
1158		- devmem->pfn_first = -1UL;
1159		- devmem->pfn_last = -1UL;
1160		- devmem->resource = NULL;
1161		- devmem->device = device;
1162		- devmem->ops = ops;
1163		-
1164		- ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
1165		- 0, GFP_KERNEL);
1166		- if (ret)
1167		- return ERR_PTR(ret);
1168		-
1169		- ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref);
1170		- if (ret)
1171		- return ERR_PTR(ret);
1172		-
1173		- size = ALIGN(size, PA_SECTION_SIZE);
1174		- addr = min((unsigned long)iomem_resource.end,
1175		- (1UL << MAX_PHYSMEM_BITS) - 1);
1176		- addr = addr - size + 1UL;
1177		-
1178		- /*
1179		- * FIXME add a new helper to quickly walk resource tree and find free
1180		- * range
1181		- *
1182		- * FIXME what about ioport_resource resource ?
1183		- */
1184		- for (; addr > size && addr >= iomem_resource.start; addr -= size) {
1185		- ret = region_intersects(addr, size, 0, IORES_DESC_NONE);
1186		- if (ret != REGION_DISJOINT)
1187		- continue;
1188		-
1189		- devmem->resource = devm_request_mem_region(device, addr, size,
1190		- dev_name(device));
1191		- if (!devmem->resource)
1192		- return ERR_PTR(-ENOMEM);
1193		- break;
1194		- }
1195		- if (!devmem->resource)
1196		- return ERR_PTR(-ERANGE);
1197		-
1198		- devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
1199		- devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
1200		- devmem->pfn_last = devmem->pfn_first +
1201		- (resource_size(devmem->resource) >> PAGE_SHIFT);
1202		-
1203		- ret = hmm_devmem_pages_create(devmem);
1204		- if (ret)
1205		- return ERR_PTR(ret);
1206		-
1207		- ret = devm_add_action_or_reset(device, hmm_devmem_release, devmem);
1208		- if (ret)
1209		- return ERR_PTR(ret);
1210		-
1211		- return devmem;
1212		-}
1213		-EXPORT_SYMBOL_GPL(hmm_devmem_add);
1214		-
1215		-struct hmm_devmem hmm_devmem_add_resource(const struct hmm_devmem_ops ops,
1216		- struct device *device,
1217		- struct resource *res)
1218		-{
1219		- struct hmm_devmem *devmem;
1220		- int ret;
1221		-
1222		- if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
1223		- return ERR_PTR(-EINVAL);
1224		-
1225		- dev_pagemap_get_ops();
1226		-
1227		- devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
1228		- if (!devmem)
1229		- return ERR_PTR(-ENOMEM);
1230		-
1231		- init_completion(&devmem->completion);
1232		- devmem->pfn_first = -1UL;
1233		- devmem->pfn_last = -1UL;
1234		- devmem->resource = res;
1235		- devmem->device = device;
1236		- devmem->ops = ops;
1237		-
1238		- ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
1239		- 0, GFP_KERNEL);
1240		- if (ret)
1241		- return ERR_PTR(ret);
1242		-
1243		- ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
1244		- &devmem->ref);
1245		- if (ret)
1246		- return ERR_PTR(ret);
1247		-
1248		- devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
1249		- devmem->pfn_last = devmem->pfn_first +
1250		- (resource_size(devmem->resource) >> PAGE_SHIFT);
1251		-
1252		- ret = hmm_devmem_pages_create(devmem);
1253		- if (ret)
1254		- return ERR_PTR(ret);
1255		-
1256		- ret = devm_add_action_or_reset(device, hmm_devmem_release, devmem);
1257		- if (ret)
1258		- return ERR_PTR(ret);
1259		-
1260		- ret = devm_add_action_or_reset(device, hmm_devmem_ref_kill,
1261		- &devmem->ref);
1262		- if (ret)
1263		- return ERR_PTR(ret);
1264		-
1265		- return devmem;
1266		-}
1267		-EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
1268		-
1269		-/*
1270		- * A device driver that wants to handle multiple devices memory through a
1271		- * single fake device can use hmm_device to do so. This is purely a helper
1272		- * and it is not needed to make use of any HMM functionality.
1273		- */
1274		-#define HMM_DEVICE_MAX 256
1275		-
1276		-static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
1277		-static DEFINE_SPINLOCK(hmm_device_lock);
1278		-static struct class *hmm_device_class;
1279		-static dev_t hmm_device_devt;
1280		-
1281		-static void hmm_device_release(struct device *device)
1282		-{
1283		- struct hmm_device *hmm_device;
1284		-
1285		- hmm_device = container_of(device, struct hmm_device, device);
1286		- spin_lock(&hmm_device_lock);
1287		- clear_bit(hmm_device->minor, hmm_device_mask);
1288		- spin_unlock(&hmm_device_lock);
1289		-
1290		- kfree(hmm_device);
1291		-}
1292		-
1293		-struct hmm_device hmm_device_new(void drvdata)
1294		-{
1295		- struct hmm_device *hmm_device;
1296		-
1297		- hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
1298		- if (!hmm_device)
1299		- return ERR_PTR(-ENOMEM);
1300		-
1301		- spin_lock(&hmm_device_lock);
1302		- hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX);
1303		- if (hmm_device->minor >= HMM_DEVICE_MAX) {
1304		- spin_unlock(&hmm_device_lock);
1305		- kfree(hmm_device);
1306		- return ERR_PTR(-EBUSY);
1307		- }
1308		- set_bit(hmm_device->minor, hmm_device_mask);
1309		- spin_unlock(&hmm_device_lock);
1310		-
1311		- dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor);
1312		- hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
1313		- hmm_device->minor);
1314		- hmm_device->device.release = hmm_device_release;
1315		- dev_set_drvdata(&hmm_device->device, drvdata);
1316		- hmm_device->device.class = hmm_device_class;
1317		- device_initialize(&hmm_device->device);
1318		-
1319		- return hmm_device;
1320		-}
1321		-EXPORT_SYMBOL(hmm_device_new);
1322		-
1323		-void hmm_device_put(struct hmm_device *hmm_device)
1324		-{
1325		- put_device(&hmm_device->device);
1326		-}
1327		-EXPORT_SYMBOL(hmm_device_put);
1328		-
1329		-static int __init hmm_init(void)
1330		-{
1331		- int ret;
1332		-
1333		- ret = alloc_chrdev_region(&hmm_device_devt, 0,
1334		- HMM_DEVICE_MAX,
1335		- "hmm_device");
1336		- if (ret)
1337		- return ret;
1338		-
1339		- hmm_device_class = class_create(THIS_MODULE, "hmm_device");
1340		- if (IS_ERR(hmm_device_class)) {
1341		- unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
1342		- return PTR_ERR(hmm_device_class);
1343		- }
1344		- return 0;
1345		-}
1346		-
1347		-device_initcall(hmm_init);
1348		-#endif /* CONFIG_DEVICE_PRIVATE \|\| CONFIG_DEVICE_PUBLIC */
	599	+EXPORT_SYMBOL(hmm_range_fault);