.. | .. |
---|
1 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
---|
2 | 2 | #ifndef _LINUX_MEMREMAP_H_ |
---|
3 | 3 | #define _LINUX_MEMREMAP_H_ |
---|
| 4 | +#include <linux/range.h> |
---|
4 | 5 | #include <linux/ioport.h> |
---|
5 | 6 | #include <linux/percpu-refcount.h> |
---|
6 | | - |
---|
7 | | -#include <asm/pgtable.h> |
---|
8 | 7 | |
---|
9 | 8 | struct resource; |
---|
10 | 9 | struct device; |
---|
.. | .. |
---|
19 | 18 | */ |
---|
20 | 19 | struct vmem_altmap { |
---|
21 | 20 | const unsigned long base_pfn; |
---|
| 21 | + const unsigned long end_pfn; |
---|
22 | 22 | const unsigned long reserve; |
---|
23 | 23 | unsigned long free; |
---|
24 | 24 | unsigned long align; |
---|
.. | .. |
---|
39 | 39 | * A more complete discussion of unaddressable memory may be found in |
---|
40 | 40 | * include/linux/hmm.h and Documentation/vm/hmm.rst. |
---|
41 | 41 | * |
---|
42 | | - * MEMORY_DEVICE_PUBLIC: |
---|
43 | | - * Device memory that is cache coherent from device and CPU point of view. This |
---|
44 | | - * is use on platform that have an advance system bus (like CAPI or CCIX). A |
---|
45 | | - * driver can hotplug the device memory using ZONE_DEVICE and with that memory |
---|
46 | | - * type. Any page of a process can be migrated to such memory. However no one |
---|
47 | | - * should be allow to pin such memory so that it can always be evicted. |
---|
48 | | - * |
---|
49 | 42 | * MEMORY_DEVICE_FS_DAX: |
---|
50 | 43 | * Host memory that has similar access semantics as System RAM i.e. DMA |
---|
51 | 44 | * coherent and supports page pinning. In support of coordinating page |
---|
.. | .. |
---|
53 | 46 | * wakeup event whenever a page is unpinned and becomes idle. This |
---|
54 | 47 | * wakeup is used to coordinate physical address space management (ex: |
---|
55 | 48 | * fs truncate/hole punch) vs pinned pages (ex: device dma). |
---|
| 49 | + * |
---|
| 50 | + * MEMORY_DEVICE_GENERIC: |
---|
| 51 | + * Host memory that has similar access semantics as System RAM i.e. DMA |
---|
| 52 | + * coherent and supports page pinning. This is for example used by DAX devices |
---|
| 53 | + * that expose memory using a character device. |
---|
| 54 | + * |
---|
| 55 | + * MEMORY_DEVICE_PCI_P2PDMA: |
---|
| 56 | + * Device memory residing in a PCI BAR intended for use with Peer-to-Peer |
---|
| 57 | + * transactions. |
---|
56 | 58 | */ |
---|
57 | 59 | enum memory_type { |
---|
| 60 | + /* 0 is reserved to catch uninitialized type fields */ |
---|
58 | 61 | MEMORY_DEVICE_PRIVATE = 1, |
---|
59 | | - MEMORY_DEVICE_PUBLIC, |
---|
60 | 62 | MEMORY_DEVICE_FS_DAX, |
---|
| 63 | + MEMORY_DEVICE_GENERIC, |
---|
| 64 | + MEMORY_DEVICE_PCI_P2PDMA, |
---|
61 | 65 | }; |
---|
62 | 66 | |
---|
63 | | -/* |
---|
64 | | - * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two |
---|
65 | | - * callbacks: |
---|
66 | | - * page_fault() |
---|
67 | | - * page_free() |
---|
68 | | - * |
---|
69 | | - * Additional notes about MEMORY_DEVICE_PRIVATE may be found in |
---|
70 | | - * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief |
---|
71 | | - * explanation in include/linux/memory_hotplug.h. |
---|
72 | | - * |
---|
73 | | - * The page_fault() callback must migrate page back, from device memory to |
---|
74 | | - * system memory, so that the CPU can access it. This might fail for various |
---|
75 | | - * reasons (device issues, device have been unplugged, ...). When such error |
---|
76 | | - * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and |
---|
77 | | - * set the CPU page table entry to "poisoned". |
---|
78 | | - * |
---|
79 | | - * Note that because memory cgroup charges are transferred to the device memory, |
---|
80 | | - * this should never fail due to memory restrictions. However, allocation |
---|
81 | | - * of a regular system page might still fail because we are out of memory. If |
---|
82 | | - * that happens, the page_fault() callback must return VM_FAULT_OOM. |
---|
83 | | - * |
---|
84 | | - * The page_fault() callback can also try to migrate back multiple pages in one |
---|
85 | | - * chunk, as an optimization. It must, however, prioritize the faulting address |
---|
86 | | - * over all the others. |
---|
87 | | - * |
---|
88 | | - * |
---|
89 | | - * The page_free() callback is called once the page refcount reaches 1 |
---|
90 | | - * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. |
---|
91 | | - * This allows the device driver to implement its own memory management.) |
---|
92 | | - * |
---|
93 | | - * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. |
---|
94 | | - */ |
---|
95 | | -typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, |
---|
96 | | - unsigned long addr, |
---|
97 | | - const struct page *page, |
---|
98 | | - unsigned int flags, |
---|
99 | | - pmd_t *pmdp); |
---|
100 | | -typedef void (*dev_page_free_t)(struct page *page, void *data); |
---|
| 67 | +struct dev_pagemap_ops { |
---|
| 68 | + /* |
---|
| 69 | + * Called once the page refcount reaches 1. (ZONE_DEVICE pages never |
---|
| 70 | + * reach 0 refcount unless there is a refcount bug. This allows the |
---|
| 71 | + * device driver to implement its own memory management.) |
---|
| 72 | + */ |
---|
| 73 | + void (*page_free)(struct page *page); |
---|
| 74 | + |
---|
| 75 | + /* |
---|
| 76 | + * Transition the refcount in struct dev_pagemap to the dead state. |
---|
| 77 | + */ |
---|
| 78 | + void (*kill)(struct dev_pagemap *pgmap); |
---|
| 79 | + |
---|
| 80 | + /* |
---|
| 81 | + * Wait for refcount in struct dev_pagemap to be idle and reap it. |
---|
| 82 | + */ |
---|
| 83 | + void (*cleanup)(struct dev_pagemap *pgmap); |
---|
| 84 | + |
---|
| 85 | + /* |
---|
| 86 | + * Used for private (un-addressable) device memory only. Must migrate |
---|
| 87 | + * the page back to a CPU accessible page. |
---|
| 88 | + */ |
---|
| 89 | + vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); |
---|
| 90 | +}; |
---|
| 91 | + |
---|
| 92 | +#define PGMAP_ALTMAP_VALID (1 << 0) |
---|
101 | 93 | |
---|
102 | 94 | /** |
---|
103 | 95 | * struct dev_pagemap - metadata for ZONE_DEVICE mappings |
---|
104 | | - * @page_fault: callback when CPU fault on an unaddressable device page |
---|
105 | | - * @page_free: free page callback when page refcount reaches 1 |
---|
106 | 96 | * @altmap: pre-allocated/reserved memory for vmemmap allocations |
---|
107 | | - * @res: physical address range covered by @ref |
---|
108 | 97 | * @ref: reference count that pins the devm_memremap_pages() mapping |
---|
109 | | - * @kill: callback to transition @ref to the dead state |
---|
110 | | - * @dev: host device of the mapping for debug |
---|
111 | | - * @data: private data pointer for page_free() |
---|
| 98 | + * @internal_ref: internal reference if @ref is not provided by the caller |
---|
| 99 | + * @done: completion for @internal_ref |
---|
112 | 100 | * @type: memory type: see MEMORY_* in memory_hotplug.h |
---|
| 101 | + * @flags: PGMAP_* flags to specify defailed behavior |
---|
| 102 | + * @ops: method table |
---|
| 103 | + * @owner: an opaque pointer identifying the entity that manages this |
---|
| 104 | + * instance. Used by various helpers to make sure that no |
---|
| 105 | + * foreign ZONE_DEVICE memory is accessed. |
---|
| 106 | + * @nr_range: number of ranges to be mapped |
---|
| 107 | + * @range: range to be mapped when nr_range == 1 |
---|
| 108 | + * @ranges: array of ranges to be mapped when nr_range > 1 |
---|
113 | 109 | */ |
---|
114 | 110 | struct dev_pagemap { |
---|
115 | | - dev_page_fault_t page_fault; |
---|
116 | | - dev_page_free_t page_free; |
---|
117 | 111 | struct vmem_altmap altmap; |
---|
118 | | - bool altmap_valid; |
---|
119 | | - struct resource res; |
---|
120 | 112 | struct percpu_ref *ref; |
---|
121 | | - void (*kill)(struct percpu_ref *ref); |
---|
122 | | - struct device *dev; |
---|
123 | | - void *data; |
---|
| 113 | + struct percpu_ref internal_ref; |
---|
| 114 | + struct completion done; |
---|
124 | 115 | enum memory_type type; |
---|
| 116 | + unsigned int flags; |
---|
| 117 | + const struct dev_pagemap_ops *ops; |
---|
| 118 | + void *owner; |
---|
| 119 | + int nr_range; |
---|
| 120 | + union { |
---|
| 121 | + struct range range; |
---|
| 122 | + struct range ranges[0]; |
---|
| 123 | + }; |
---|
125 | 124 | }; |
---|
126 | 125 | |
---|
| 126 | +static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) |
---|
| 127 | +{ |
---|
| 128 | + if (pgmap->flags & PGMAP_ALTMAP_VALID) |
---|
| 129 | + return &pgmap->altmap; |
---|
| 130 | + return NULL; |
---|
| 131 | +} |
---|
| 132 | + |
---|
127 | 133 | #ifdef CONFIG_ZONE_DEVICE |
---|
| 134 | +void *memremap_pages(struct dev_pagemap *pgmap, int nid); |
---|
| 135 | +void memunmap_pages(struct dev_pagemap *pgmap); |
---|
128 | 136 | void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); |
---|
| 137 | +void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); |
---|
129 | 138 | struct dev_pagemap *get_dev_pagemap(unsigned long pfn, |
---|
130 | 139 | struct dev_pagemap *pgmap); |
---|
| 140 | +bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); |
---|
131 | 141 | |
---|
132 | 142 | unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); |
---|
133 | 143 | void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); |
---|
| 144 | +unsigned long memremap_compat_align(void); |
---|
134 | 145 | #else |
---|
135 | 146 | static inline void *devm_memremap_pages(struct device *dev, |
---|
136 | 147 | struct dev_pagemap *pgmap) |
---|
.. | .. |
---|
144 | 155 | return ERR_PTR(-ENXIO); |
---|
145 | 156 | } |
---|
146 | 157 | |
---|
| 158 | +static inline void devm_memunmap_pages(struct device *dev, |
---|
| 159 | + struct dev_pagemap *pgmap) |
---|
| 160 | +{ |
---|
| 161 | +} |
---|
| 162 | + |
---|
147 | 163 | static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, |
---|
148 | 164 | struct dev_pagemap *pgmap) |
---|
149 | 165 | { |
---|
150 | 166 | return NULL; |
---|
| 167 | +} |
---|
| 168 | + |
---|
| 169 | +static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) |
---|
| 170 | +{ |
---|
| 171 | + return false; |
---|
151 | 172 | } |
---|
152 | 173 | |
---|
153 | 174 | static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) |
---|
.. | .. |
---|
159 | 180 | unsigned long nr_pfns) |
---|
160 | 181 | { |
---|
161 | 182 | } |
---|
| 183 | + |
---|
| 184 | +/* when memremap_pages() is disabled all archs can remap a single page */ |
---|
| 185 | +static inline unsigned long memremap_compat_align(void) |
---|
| 186 | +{ |
---|
| 187 | + return PAGE_SIZE; |
---|
| 188 | +} |
---|
162 | 189 | #endif /* CONFIG_ZONE_DEVICE */ |
---|
163 | 190 | |
---|
164 | 191 | static inline void put_dev_pagemap(struct dev_pagemap *pgmap) |
---|
.. | .. |
---|
166 | 193 | if (pgmap) |
---|
167 | 194 | percpu_ref_put(pgmap->ref); |
---|
168 | 195 | } |
---|
| 196 | + |
---|
169 | 197 | #endif /* _LINUX_MEMREMAP_H_ */ |
---|