.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright 2013 Red Hat Inc. |
---|
3 | 4 | * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License as published by |
---|
6 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
7 | | - * (at your option) any later version. |
---|
| 5 | + * Authors: Jérôme Glisse <jglisse@redhat.com> |
---|
8 | 6 | * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | | - * GNU General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * Authors: Jérôme Glisse <jglisse@redhat.com> |
---|
15 | | - */ |
---|
16 | | -/* |
---|
17 | | - * Heterogeneous Memory Management (HMM) |
---|
18 | | - * |
---|
19 | | - * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it |
---|
20 | | - * is for. Here we focus on the HMM API description, with some explanation of |
---|
21 | | - * the underlying implementation. |
---|
22 | | - * |
---|
23 | | - * Short description: HMM provides a set of helpers to share a virtual address |
---|
24 | | - * space between CPU and a device, so that the device can access any valid |
---|
25 | | - * address of the process (while still obeying memory protection). HMM also |
---|
26 | | - * provides helpers to migrate process memory to device memory, and back. Each |
---|
27 | | - * set of functionality (address space mirroring, and migration to and from |
---|
28 | | - * device memory) can be used independently of the other. |
---|
29 | | - * |
---|
30 | | - * |
---|
31 | | - * HMM address space mirroring API: |
---|
32 | | - * |
---|
33 | | - * Use HMM address space mirroring if you want to mirror range of the CPU page |
---|
34 | | - * table of a process into a device page table. Here, "mirror" means "keep |
---|
35 | | - * synchronized". Prerequisites: the device must provide the ability to write- |
---|
36 | | - * protect its page tables (at PAGE_SIZE granularity), and must be able to |
---|
37 | | - * recover from the resulting potential page faults. |
---|
38 | | - * |
---|
39 | | - * HMM guarantees that at any point in time, a given virtual address points to |
---|
40 | | - * either the same memory in both CPU and device page tables (that is: CPU and |
---|
41 | | - * device page tables each point to the same pages), or that one page table (CPU |
---|
42 | | - * or device) points to no entry, while the other still points to the old page |
---|
43 | | - * for the address. The latter case happens when the CPU page table update |
---|
44 | | - * happens first, and then the update is mirrored over to the device page table. |
---|
45 | | - * This does not cause any issue, because the CPU page table cannot start |
---|
46 | | - * pointing to a new page until the device page table is invalidated. |
---|
47 | | - * |
---|
48 | | - * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any |
---|
49 | | - * updates to each device driver that has registered a mirror. It also provides |
---|
50 | | - * some API calls to help with taking a snapshot of the CPU page table, and to |
---|
51 | | - * synchronize with any updates that might happen concurrently. |
---|
52 | | - * |
---|
53 | | - * |
---|
54 | | - * HMM migration to and from device memory: |
---|
55 | | - * |
---|
56 | | - * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with |
---|
57 | | - * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page |
---|
58 | | - * of the device memory, and allows the device driver to manage its memory |
---|
59 | | - * using those struct pages. Having struct pages for device memory makes |
---|
60 | | - * migration easier. Because that memory is not addressable by the CPU it must |
---|
61 | | - * never be pinned to the device; in other words, any CPU page fault can always |
---|
62 | | - * cause the device memory to be migrated (copied/moved) back to regular memory. |
---|
63 | | - * |
---|
64 | | - * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that |
---|
65 | | - * allows use of a device DMA engine to perform the copy operation between |
---|
66 | | - * regular system memory and device memory. |
---|
| 7 | + * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. |
---|
67 | 8 | */ |
---|
68 | 9 | #ifndef LINUX_HMM_H |
---|
69 | 10 | #define LINUX_HMM_H |
---|
70 | 11 | |
---|
71 | 12 | #include <linux/kconfig.h> |
---|
72 | | - |
---|
73 | | -#if IS_ENABLED(CONFIG_HMM) |
---|
| 13 | +#include <linux/pgtable.h> |
---|
74 | 14 | |
---|
75 | 15 | #include <linux/device.h> |
---|
76 | 16 | #include <linux/migrate.h> |
---|
77 | 17 | #include <linux/memremap.h> |
---|
78 | 18 | #include <linux/completion.h> |
---|
79 | | - |
---|
80 | | -struct hmm; |
---|
| 19 | +#include <linux/mmu_notifier.h> |
---|
81 | 20 | |
---|
82 | 21 | /* |
---|
83 | | - * hmm_pfn_flag_e - HMM flag enums |
---|
| 22 | + * On output: |
---|
| 23 | + * 0 - The page is faultable and a future call with |
---|
| 24 | + * HMM_PFN_REQ_FAULT could succeed. |
---|
| 25 | + * HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at |
---|
| 26 | + * least readable. If dev_private_owner is !NULL then this could |
---|
| 27 | + * point at a DEVICE_PRIVATE page. |
---|
| 28 | + * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) |
---|
| 29 | + * HMM_PFN_ERROR - accessing the pfn is impossible and the device should |
---|
| 30 | + * fail. ie poisoned memory, special pages, no vma, etc |
---|
84 | 31 | * |
---|
85 | | - * Flags: |
---|
86 | | - * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. |
---|
87 | | - * HMM_PFN_WRITE: CPU page table has write permission set |
---|
88 | | - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) |
---|
89 | | - * |
---|
90 | | - * The driver provide a flags array, if driver valid bit for an entry is bit |
---|
91 | | - * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide |
---|
92 | | - * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. |
---|
93 | | - * Same logic apply to all flags. This is same idea as vm_page_prot in vma |
---|
94 | | - * except that this is per device driver rather than per architecture. |
---|
| 32 | + * On input: |
---|
| 33 | + * 0 - Return the current state of the page, do not fault it. |
---|
| 34 | + * HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault() |
---|
| 35 | + * will fail |
---|
| 36 | + * HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault() |
---|
| 37 | + * will fail. Must be combined with HMM_PFN_REQ_FAULT. |
---|
95 | 38 | */ |
---|
96 | | -enum hmm_pfn_flag_e { |
---|
97 | | - HMM_PFN_VALID = 0, |
---|
98 | | - HMM_PFN_WRITE, |
---|
99 | | - HMM_PFN_DEVICE_PRIVATE, |
---|
100 | | - HMM_PFN_FLAG_MAX |
---|
| 39 | +enum hmm_pfn_flags { |
---|
| 40 | + /* Output fields and flags */ |
---|
| 41 | + HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), |
---|
| 42 | + HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), |
---|
| 43 | + HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), |
---|
| 44 | + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), |
---|
| 45 | + |
---|
| 46 | + /* Input flags */ |
---|
| 47 | + HMM_PFN_REQ_FAULT = HMM_PFN_VALID, |
---|
| 48 | + HMM_PFN_REQ_WRITE = HMM_PFN_WRITE, |
---|
| 49 | + |
---|
| 50 | + HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT, |
---|
101 | 51 | }; |
---|
102 | 52 | |
---|
103 | 53 | /* |
---|
104 | | - * hmm_pfn_value_e - HMM pfn special value |
---|
| 54 | + * hmm_pfn_to_page() - return struct page pointed to by a device entry |
---|
105 | 55 | * |
---|
106 | | - * Flags: |
---|
107 | | - * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory |
---|
108 | | - * HMM_PFN_NONE: corresponding CPU page table entry is pte_none() |
---|
109 | | - * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the |
---|
110 | | - * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not |
---|
111 | | - * be mirrored by a device, because the entry will never have HMM_PFN_VALID |
---|
112 | | - * set and the pfn value is undefined. |
---|
113 | | - * |
---|
114 | | - * Driver provide entry value for none entry, error entry and special entry, |
---|
115 | | - * driver can alias (ie use same value for error and special for instance). It |
---|
116 | | - * should not alias none and error or special. |
---|
117 | | - * |
---|
118 | | - * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: |
---|
119 | | - * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, |
---|
120 | | - * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table |
---|
121 | | - * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one |
---|
| 56 | + * This must be called under the caller 'user_lock' after a successful |
---|
| 57 | + * mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID |
---|
| 58 | + * already. |
---|
122 | 59 | */ |
---|
123 | | -enum hmm_pfn_value_e { |
---|
124 | | - HMM_PFN_ERROR, |
---|
125 | | - HMM_PFN_NONE, |
---|
126 | | - HMM_PFN_SPECIAL, |
---|
127 | | - HMM_PFN_VALUE_MAX |
---|
128 | | -}; |
---|
| 60 | +static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) |
---|
| 61 | +{ |
---|
| 62 | + return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS); |
---|
| 63 | +} |
---|
| 64 | + |
---|
| 65 | +/* |
---|
| 66 | + * hmm_pfn_to_map_order() - return the CPU mapping size order |
---|
| 67 | + * |
---|
| 68 | + * This is optionally useful to optimize processing of the pfn result |
---|
| 69 | + * array. It indicates that the page starts at the order aligned VA and is |
---|
| 70 | + * 1<<order bytes long. Every pfn within an high order page will have the |
---|
| 71 | + * same pfn flags, both access protections and the map_order. The caller must |
---|
| 72 | + * be careful with edge cases as the start and end VA of the given page may |
---|
| 73 | + * extend past the range used with hmm_range_fault(). |
---|
| 74 | + * |
---|
| 75 | + * This must be called under the caller 'user_lock' after a successful |
---|
| 76 | + * mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID |
---|
| 77 | + * already. |
---|
| 78 | + */ |
---|
| 79 | +static inline unsigned int hmm_pfn_to_map_order(unsigned long hmm_pfn) |
---|
| 80 | +{ |
---|
| 81 | + return (hmm_pfn >> HMM_PFN_ORDER_SHIFT) & 0x1F; |
---|
| 82 | +} |
---|
129 | 83 | |
---|
130 | 84 | /* |
---|
131 | 85 | * struct hmm_range - track invalidation lock on virtual address range |
---|
132 | 86 | * |
---|
133 | | - * @vma: the vm area struct for the range |
---|
134 | | - * @list: all range lock are on a list |
---|
| 87 | + * @notifier: a mmu_interval_notifier that includes the start/end |
---|
| 88 | + * @notifier_seq: result of mmu_interval_read_begin() |
---|
135 | 89 | * @start: range virtual start address (inclusive) |
---|
136 | 90 | * @end: range virtual end address (exclusive) |
---|
137 | | - * @pfns: array of pfns (big enough for the range) |
---|
138 | | - * @flags: pfn flags to match device driver page table |
---|
139 | | - * @values: pfn value for some special case (none, special, error, ...) |
---|
140 | | - * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) |
---|
141 | | - * @valid: pfns array did not change since it has been fill by an HMM function |
---|
| 91 | + * @hmm_pfns: array of pfns (big enough for the range) |
---|
| 92 | + * @default_flags: default flags for the range (write, read, ... see hmm doc) |
---|
| 93 | + * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter |
---|
| 94 | + * @dev_private_owner: owner of device private pages |
---|
142 | 95 | */ |
---|
143 | 96 | struct hmm_range { |
---|
144 | | - struct vm_area_struct *vma; |
---|
145 | | - struct list_head list; |
---|
| 97 | + struct mmu_interval_notifier *notifier; |
---|
| 98 | + unsigned long notifier_seq; |
---|
146 | 99 | unsigned long start; |
---|
147 | 100 | unsigned long end; |
---|
148 | | - uint64_t *pfns; |
---|
149 | | - const uint64_t *flags; |
---|
150 | | - const uint64_t *values; |
---|
151 | | - uint8_t pfn_shift; |
---|
152 | | - bool valid; |
---|
| 101 | + unsigned long *hmm_pfns; |
---|
| 102 | + unsigned long default_flags; |
---|
| 103 | + unsigned long pfn_flags_mask; |
---|
| 104 | + void *dev_private_owner; |
---|
153 | 105 | }; |
---|
154 | 106 | |
---|
155 | 107 | /* |
---|
156 | | - * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn |
---|
157 | | - * @range: range use to decode HMM pfn value |
---|
158 | | - * @pfn: HMM pfn value to get corresponding struct page from |
---|
159 | | - * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise |
---|
160 | | - * |
---|
161 | | - * If the HMM pfn is valid (ie valid flag set) then return the struct page |
---|
162 | | - * matching the pfn value stored in the HMM pfn. Otherwise return NULL. |
---|
| 108 | + * Please see Documentation/vm/hmm.rst for how to use the range API. |
---|
163 | 109 | */ |
---|
164 | | -static inline struct page *hmm_pfn_to_page(const struct hmm_range *range, |
---|
165 | | - uint64_t pfn) |
---|
166 | | -{ |
---|
167 | | - if (pfn == range->values[HMM_PFN_NONE]) |
---|
168 | | - return NULL; |
---|
169 | | - if (pfn == range->values[HMM_PFN_ERROR]) |
---|
170 | | - return NULL; |
---|
171 | | - if (pfn == range->values[HMM_PFN_SPECIAL]) |
---|
172 | | - return NULL; |
---|
173 | | - if (!(pfn & range->flags[HMM_PFN_VALID])) |
---|
174 | | - return NULL; |
---|
175 | | - return pfn_to_page(pfn >> range->pfn_shift); |
---|
176 | | -} |
---|
| 110 | +int hmm_range_fault(struct hmm_range *range); |
---|
177 | 111 | |
---|
178 | 112 | /* |
---|
179 | | - * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn |
---|
180 | | - * @range: range use to decode HMM pfn value |
---|
181 | | - * @pfn: HMM pfn value to extract pfn from |
---|
182 | | - * Returns: pfn value if HMM pfn is valid, -1UL otherwise |
---|
| 113 | + * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range |
---|
| 114 | + * |
---|
| 115 | + * When waiting for mmu notifiers we need some kind of time out otherwise we |
---|
| 116 | + * could potentialy wait for ever, 1000ms ie 1s sounds like a long time to |
---|
| 117 | + * wait already. |
---|
183 | 118 | */ |
---|
184 | | -static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range, |
---|
185 | | - uint64_t pfn) |
---|
186 | | -{ |
---|
187 | | - if (pfn == range->values[HMM_PFN_NONE]) |
---|
188 | | - return -1UL; |
---|
189 | | - if (pfn == range->values[HMM_PFN_ERROR]) |
---|
190 | | - return -1UL; |
---|
191 | | - if (pfn == range->values[HMM_PFN_SPECIAL]) |
---|
192 | | - return -1UL; |
---|
193 | | - if (!(pfn & range->flags[HMM_PFN_VALID])) |
---|
194 | | - return -1UL; |
---|
195 | | - return (pfn >> range->pfn_shift); |
---|
196 | | -} |
---|
197 | | - |
---|
198 | | -/* |
---|
199 | | - * hmm_pfn_from_page() - create a valid HMM pfn value from struct page |
---|
200 | | - * @range: range use to encode HMM pfn value |
---|
201 | | - * @page: struct page pointer for which to create the HMM pfn |
---|
202 | | - * Returns: valid HMM pfn for the page |
---|
203 | | - */ |
---|
204 | | -static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range, |
---|
205 | | - struct page *page) |
---|
206 | | -{ |
---|
207 | | - return (page_to_pfn(page) << range->pfn_shift) | |
---|
208 | | - range->flags[HMM_PFN_VALID]; |
---|
209 | | -} |
---|
210 | | - |
---|
211 | | -/* |
---|
212 | | - * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn |
---|
213 | | - * @range: range use to encode HMM pfn value |
---|
214 | | - * @pfn: pfn value for which to create the HMM pfn |
---|
215 | | - * Returns: valid HMM pfn for the pfn |
---|
216 | | - */ |
---|
217 | | -static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range, |
---|
218 | | - unsigned long pfn) |
---|
219 | | -{ |
---|
220 | | - return (pfn << range->pfn_shift) | |
---|
221 | | - range->flags[HMM_PFN_VALID]; |
---|
222 | | -} |
---|
223 | | - |
---|
224 | | - |
---|
225 | | -#if IS_ENABLED(CONFIG_HMM_MIRROR) |
---|
226 | | -/* |
---|
227 | | - * Mirroring: how to synchronize device page table with CPU page table. |
---|
228 | | - * |
---|
229 | | - * A device driver that is participating in HMM mirroring must always |
---|
230 | | - * synchronize with CPU page table updates. For this, device drivers can either |
---|
231 | | - * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device |
---|
232 | | - * drivers can decide to register one mirror per device per process, or just |
---|
233 | | - * one mirror per process for a group of devices. The pattern is: |
---|
234 | | - * |
---|
235 | | - * int device_bind_address_space(..., struct mm_struct *mm, ...) |
---|
236 | | - * { |
---|
237 | | - * struct device_address_space *das; |
---|
238 | | - * |
---|
239 | | - * // Device driver specific initialization, and allocation of das |
---|
240 | | - * // which contains an hmm_mirror struct as one of its fields. |
---|
241 | | - * ... |
---|
242 | | - * |
---|
243 | | - * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); |
---|
244 | | - * if (ret) { |
---|
245 | | - * // Cleanup on error |
---|
246 | | - * return ret; |
---|
247 | | - * } |
---|
248 | | - * |
---|
249 | | - * // Other device driver specific initialization |
---|
250 | | - * ... |
---|
251 | | - * } |
---|
252 | | - * |
---|
253 | | - * Once an hmm_mirror is registered for an address space, the device driver |
---|
254 | | - * will get callbacks through sync_cpu_device_pagetables() operation (see |
---|
255 | | - * hmm_mirror_ops struct). |
---|
256 | | - * |
---|
257 | | - * Device driver must not free the struct containing the hmm_mirror struct |
---|
258 | | - * before calling hmm_mirror_unregister(). The expected usage is to do that when |
---|
259 | | - * the device driver is unbinding from an address space. |
---|
260 | | - * |
---|
261 | | - * |
---|
262 | | - * void device_unbind_address_space(struct device_address_space *das) |
---|
263 | | - * { |
---|
264 | | - * // Device driver specific cleanup |
---|
265 | | - * ... |
---|
266 | | - * |
---|
267 | | - * hmm_mirror_unregister(&das->mirror); |
---|
268 | | - * |
---|
269 | | - * // Other device driver specific cleanup, and now das can be freed |
---|
270 | | - * ... |
---|
271 | | - * } |
---|
272 | | - */ |
---|
273 | | - |
---|
274 | | -struct hmm_mirror; |
---|
275 | | - |
---|
276 | | -/* |
---|
277 | | - * enum hmm_update_type - type of update |
---|
278 | | - * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) |
---|
279 | | - */ |
---|
280 | | -enum hmm_update_type { |
---|
281 | | - HMM_UPDATE_INVALIDATE, |
---|
282 | | -}; |
---|
283 | | - |
---|
284 | | -/* |
---|
285 | | - * struct hmm_mirror_ops - HMM mirror device operations callback |
---|
286 | | - * |
---|
287 | | - * @update: callback to update range on a device |
---|
288 | | - */ |
---|
289 | | -struct hmm_mirror_ops { |
---|
290 | | - /* release() - release hmm_mirror |
---|
291 | | - * |
---|
292 | | - * @mirror: pointer to struct hmm_mirror |
---|
293 | | - * |
---|
294 | | - * This is called when the mm_struct is being released. |
---|
295 | | - * The callback should make sure no references to the mirror occur |
---|
296 | | - * after the callback returns. |
---|
297 | | - */ |
---|
298 | | - void (*release)(struct hmm_mirror *mirror); |
---|
299 | | - |
---|
300 | | - /* sync_cpu_device_pagetables() - synchronize page tables |
---|
301 | | - * |
---|
302 | | - * @mirror: pointer to struct hmm_mirror |
---|
303 | | - * @update_type: type of update that occurred to the CPU page table |
---|
304 | | - * @start: virtual start address of the range to update |
---|
305 | | - * @end: virtual end address of the range to update |
---|
306 | | - * |
---|
307 | | - * This callback ultimately originates from mmu_notifiers when the CPU |
---|
308 | | - * page table is updated. The device driver must update its page table |
---|
309 | | - * in response to this callback. The update argument tells what action |
---|
310 | | - * to perform. |
---|
311 | | - * |
---|
312 | | - * The device driver must not return from this callback until the device |
---|
313 | | - * page tables are completely updated (TLBs flushed, etc); this is a |
---|
314 | | - * synchronous call. |
---|
315 | | - */ |
---|
316 | | - void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, |
---|
317 | | - enum hmm_update_type update_type, |
---|
318 | | - unsigned long start, |
---|
319 | | - unsigned long end); |
---|
320 | | -}; |
---|
321 | | - |
---|
322 | | -/* |
---|
323 | | - * struct hmm_mirror - mirror struct for a device driver |
---|
324 | | - * |
---|
325 | | - * @hmm: pointer to struct hmm (which is unique per mm_struct) |
---|
326 | | - * @ops: device driver callback for HMM mirror operations |
---|
327 | | - * @list: for list of mirrors of a given mm |
---|
328 | | - * |
---|
329 | | - * Each address space (mm_struct) being mirrored by a device must register one |
---|
330 | | - * instance of an hmm_mirror struct with HMM. HMM will track the list of all |
---|
331 | | - * mirrors for each mm_struct. |
---|
332 | | - */ |
---|
333 | | -struct hmm_mirror { |
---|
334 | | - struct hmm *hmm; |
---|
335 | | - const struct hmm_mirror_ops *ops; |
---|
336 | | - struct list_head list; |
---|
337 | | -}; |
---|
338 | | - |
---|
339 | | -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); |
---|
340 | | -void hmm_mirror_unregister(struct hmm_mirror *mirror); |
---|
341 | | - |
---|
342 | | - |
---|
343 | | -/* |
---|
344 | | - * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device |
---|
345 | | - * driver lock that serializes device page table updates, then call |
---|
346 | | - * hmm_vma_range_done(), to check if the snapshot is still valid. The same |
---|
347 | | - * device driver page table update lock must also be used in the |
---|
348 | | - * hmm_mirror_ops.sync_cpu_device_pagetables() callback, so that CPU page |
---|
349 | | - * table invalidation serializes on it. |
---|
350 | | - * |
---|
351 | | - * YOU MUST CALL hmm_vma_range_done() ONCE AND ONLY ONCE EACH TIME YOU CALL |
---|
352 | | - * hmm_vma_get_pfns() WITHOUT ERROR ! |
---|
353 | | - * |
---|
354 | | - * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! |
---|
355 | | - */ |
---|
356 | | -int hmm_vma_get_pfns(struct hmm_range *range); |
---|
357 | | -bool hmm_vma_range_done(struct hmm_range *range); |
---|
358 | | - |
---|
359 | | - |
---|
360 | | -/* |
---|
361 | | - * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will |
---|
362 | | - * not migrate any device memory back to system memory. The HMM pfn array will |
---|
363 | | - * be updated with the fault result and current snapshot of the CPU page table |
---|
364 | | - * for the range. |
---|
365 | | - * |
---|
366 | | - * The mmap_sem must be taken in read mode before entering and it might be |
---|
367 | | - * dropped by the function if the block argument is false. In that case, the |
---|
368 | | - * function returns -EAGAIN. |
---|
369 | | - * |
---|
370 | | - * Return value does not reflect if the fault was successful for every single |
---|
371 | | - * address or not. Therefore, the caller must to inspect the HMM pfn array to |
---|
372 | | - * determine fault status for each address. |
---|
373 | | - * |
---|
374 | | - * Trying to fault inside an invalid vma will result in -EINVAL. |
---|
375 | | - * |
---|
376 | | - * See the function description in mm/hmm.c for further documentation. |
---|
377 | | - */ |
---|
378 | | -int hmm_vma_fault(struct hmm_range *range, bool block); |
---|
379 | | - |
---|
380 | | -/* Below are for HMM internal use only! Not to be used by device driver! */ |
---|
381 | | -void hmm_mm_destroy(struct mm_struct *mm); |
---|
382 | | - |
---|
383 | | -static inline void hmm_mm_init(struct mm_struct *mm) |
---|
384 | | -{ |
---|
385 | | - mm->hmm = NULL; |
---|
386 | | -} |
---|
387 | | -#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ |
---|
388 | | -static inline void hmm_mm_destroy(struct mm_struct *mm) {} |
---|
389 | | -static inline void hmm_mm_init(struct mm_struct *mm) {} |
---|
390 | | -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ |
---|
391 | | - |
---|
392 | | -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) |
---|
393 | | -struct hmm_devmem; |
---|
394 | | - |
---|
395 | | -struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, |
---|
396 | | - unsigned long addr); |
---|
397 | | - |
---|
398 | | -/* |
---|
399 | | - * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events |
---|
400 | | - * |
---|
401 | | - * @free: call when refcount on page reach 1 and thus is no longer use |
---|
402 | | - * @fault: call when there is a page fault to unaddressable memory |
---|
403 | | - * |
---|
404 | | - * Both callback happens from page_free() and page_fault() callback of struct |
---|
405 | | - * dev_pagemap respectively. See include/linux/memremap.h for more details on |
---|
406 | | - * those. |
---|
407 | | - * |
---|
408 | | - * The hmm_devmem_ops callback are just here to provide a coherent and |
---|
409 | | - * uniq API to device driver and device driver should not register their |
---|
410 | | - * own page_free() or page_fault() but rely on the hmm_devmem_ops call- |
---|
411 | | - * back. |
---|
412 | | - */ |
---|
413 | | -struct hmm_devmem_ops { |
---|
414 | | - /* |
---|
415 | | - * free() - free a device page |
---|
416 | | - * @devmem: device memory structure (see struct hmm_devmem) |
---|
417 | | - * @page: pointer to struct page being freed |
---|
418 | | - * |
---|
419 | | - * Call back occurs whenever a device page refcount reach 1 which |
---|
420 | | - * means that no one is holding any reference on the page anymore |
---|
421 | | - * (ZONE_DEVICE page have an elevated refcount of 1 as default so |
---|
422 | | - * that they are not release to the general page allocator). |
---|
423 | | - * |
---|
424 | | - * Note that callback has exclusive ownership of the page (as no |
---|
425 | | - * one is holding any reference). |
---|
426 | | - */ |
---|
427 | | - void (*free)(struct hmm_devmem *devmem, struct page *page); |
---|
428 | | - /* |
---|
429 | | - * fault() - CPU page fault or get user page (GUP) |
---|
430 | | - * @devmem: device memory structure (see struct hmm_devmem) |
---|
431 | | - * @vma: virtual memory area containing the virtual address |
---|
432 | | - * @addr: virtual address that faulted or for which there is a GUP |
---|
433 | | - * @page: pointer to struct page backing virtual address (unreliable) |
---|
434 | | - * @flags: FAULT_FLAG_* (see include/linux/mm.h) |
---|
435 | | - * @pmdp: page middle directory |
---|
436 | | - * Returns: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR |
---|
437 | | - * on error |
---|
438 | | - * |
---|
439 | | - * The callback occurs whenever there is a CPU page fault or GUP on a |
---|
440 | | - * virtual address. This means that the device driver must migrate the |
---|
441 | | - * page back to regular memory (CPU accessible). |
---|
442 | | - * |
---|
443 | | - * The device driver is free to migrate more than one page from the |
---|
444 | | - * fault() callback as an optimization. However if device decide to |
---|
445 | | - * migrate more than one page it must always priotirize the faulting |
---|
446 | | - * address over the others. |
---|
447 | | - * |
---|
448 | | - * The struct page pointer is only given as an hint to allow quick |
---|
449 | | - * lookup of internal device driver data. A concurrent migration |
---|
450 | | - * might have already free that page and the virtual address might |
---|
451 | | - * not longer be back by it. So it should not be modified by the |
---|
452 | | - * callback. |
---|
453 | | - * |
---|
454 | | - * Note that mmap semaphore is held in read mode at least when this |
---|
455 | | - * callback occurs, hence the vma is valid upon callback entry. |
---|
456 | | - */ |
---|
457 | | - int (*fault)(struct hmm_devmem *devmem, |
---|
458 | | - struct vm_area_struct *vma, |
---|
459 | | - unsigned long addr, |
---|
460 | | - const struct page *page, |
---|
461 | | - unsigned int flags, |
---|
462 | | - pmd_t *pmdp); |
---|
463 | | -}; |
---|
464 | | - |
---|
465 | | -/* |
---|
466 | | - * struct hmm_devmem - track device memory |
---|
467 | | - * |
---|
468 | | - * @completion: completion object for device memory |
---|
469 | | - * @pfn_first: first pfn for this resource (set by hmm_devmem_add()) |
---|
470 | | - * @pfn_last: last pfn for this resource (set by hmm_devmem_add()) |
---|
471 | | - * @resource: IO resource reserved for this chunk of memory |
---|
472 | | - * @pagemap: device page map for that chunk |
---|
473 | | - * @device: device to bind resource to |
---|
474 | | - * @ops: memory operations callback |
---|
475 | | - * @ref: per CPU refcount |
---|
476 | | - * |
---|
477 | | - * This an helper structure for device drivers that do not wish to implement |
---|
478 | | - * the gory details related to hotplugging new memoy and allocating struct |
---|
479 | | - * pages. |
---|
480 | | - * |
---|
481 | | - * Device drivers can directly use ZONE_DEVICE memory on their own if they |
---|
482 | | - * wish to do so. |
---|
483 | | - */ |
---|
484 | | -struct hmm_devmem { |
---|
485 | | - struct completion completion; |
---|
486 | | - unsigned long pfn_first; |
---|
487 | | - unsigned long pfn_last; |
---|
488 | | - struct resource *resource; |
---|
489 | | - struct device *device; |
---|
490 | | - struct dev_pagemap pagemap; |
---|
491 | | - const struct hmm_devmem_ops *ops; |
---|
492 | | - struct percpu_ref ref; |
---|
493 | | -}; |
---|
494 | | - |
---|
495 | | -/* |
---|
496 | | - * To add (hotplug) device memory, HMM assumes that there is no real resource |
---|
497 | | - * that reserves a range in the physical address space (this is intended to be |
---|
498 | | - * use by unaddressable device memory). It will reserve a physical range big |
---|
499 | | - * enough and allocate struct page for it. |
---|
500 | | - * |
---|
501 | | - * The device driver can wrap the hmm_devmem struct inside a private device |
---|
502 | | - * driver struct. |
---|
503 | | - */ |
---|
504 | | -struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, |
---|
505 | | - struct device *device, |
---|
506 | | - unsigned long size); |
---|
507 | | -struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, |
---|
508 | | - struct device *device, |
---|
509 | | - struct resource *res); |
---|
510 | | - |
---|
511 | | -/* |
---|
512 | | - * hmm_devmem_page_set_drvdata - set per-page driver data field |
---|
513 | | - * |
---|
514 | | - * @page: pointer to struct page |
---|
515 | | - * @data: driver data value to set |
---|
516 | | - * |
---|
517 | | - * Because page can not be on lru we have an unsigned long that driver can use |
---|
518 | | - * to store a per page field. This just a simple helper to do that. |
---|
519 | | - */ |
---|
520 | | -static inline void hmm_devmem_page_set_drvdata(struct page *page, |
---|
521 | | - unsigned long data) |
---|
522 | | -{ |
---|
523 | | - page->hmm_data = data; |
---|
524 | | -} |
---|
525 | | - |
---|
526 | | -/* |
---|
527 | | - * hmm_devmem_page_get_drvdata - get per page driver data field |
---|
528 | | - * |
---|
529 | | - * @page: pointer to struct page |
---|
530 | | - * Return: driver data value |
---|
531 | | - */ |
---|
532 | | -static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) |
---|
533 | | -{ |
---|
534 | | - return page->hmm_data; |
---|
535 | | -} |
---|
536 | | - |
---|
537 | | - |
---|
538 | | -/* |
---|
539 | | - * struct hmm_device - fake device to hang device memory onto |
---|
540 | | - * |
---|
541 | | - * @device: device struct |
---|
542 | | - * @minor: device minor number |
---|
543 | | - */ |
---|
544 | | -struct hmm_device { |
---|
545 | | - struct device device; |
---|
546 | | - unsigned int minor; |
---|
547 | | -}; |
---|
548 | | - |
---|
549 | | -/* |
---|
550 | | - * A device driver that wants to handle multiple devices memory through a |
---|
551 | | - * single fake device can use hmm_device to do so. This is purely a helper and |
---|
552 | | - * it is not strictly needed, in order to make use of any HMM functionality. |
---|
553 | | - */ |
---|
554 | | -struct hmm_device *hmm_device_new(void *drvdata); |
---|
555 | | -void hmm_device_put(struct hmm_device *hmm_device); |
---|
556 | | -#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ |
---|
557 | | -#else /* IS_ENABLED(CONFIG_HMM) */ |
---|
558 | | -static inline void hmm_mm_destroy(struct mm_struct *mm) {} |
---|
559 | | -static inline void hmm_mm_init(struct mm_struct *mm) {} |
---|
560 | | -#endif /* IS_ENABLED(CONFIG_HMM) */ |
---|
| 119 | +#define HMM_RANGE_DEFAULT_TIMEOUT 1000 |
---|
561 | 120 | |
---|
562 | 121 | #endif /* LINUX_HMM_H */ |
---|