hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/include/linux/memremap.h
....@@ -1,10 +1,9 @@
11 /* SPDX-License-Identifier: GPL-2.0 */
22 #ifndef _LINUX_MEMREMAP_H_
33 #define _LINUX_MEMREMAP_H_
4
+#include <linux/range.h>
45 #include <linux/ioport.h>
56 #include <linux/percpu-refcount.h>
6
-
7
-#include <asm/pgtable.h>
87
98 struct resource;
109 struct device;
....@@ -19,6 +18,7 @@
1918 */
2019 struct vmem_altmap {
2120 const unsigned long base_pfn;
21
+ const unsigned long end_pfn;
2222 const unsigned long reserve;
2323 unsigned long free;
2424 unsigned long align;
....@@ -39,13 +39,6 @@
3939 * A more complete discussion of unaddressable memory may be found in
4040 * include/linux/hmm.h and Documentation/vm/hmm.rst.
4141 *
42
- * MEMORY_DEVICE_PUBLIC:
43
- * Device memory that is cache coherent from device and CPU point of view. This
44
- * is use on platform that have an advance system bus (like CAPI or CCIX). A
45
- * driver can hotplug the device memory using ZONE_DEVICE and with that memory
46
- * type. Any page of a process can be migrated to such memory. However no one
47
- * should be allow to pin such memory so that it can always be evicted.
48
- *
4942 * MEMORY_DEVICE_FS_DAX:
5043 * Host memory that has similar access semantics as System RAM i.e. DMA
5144 * coherent and supports page pinning. In support of coordinating page
....@@ -53,84 +46,102 @@
5346 * wakeup event whenever a page is unpinned and becomes idle. This
5447 * wakeup is used to coordinate physical address space management (ex:
5548 * fs truncate/hole punch) vs pinned pages (ex: device dma).
49
+ *
50
+ * MEMORY_DEVICE_GENERIC:
51
+ * Host memory that has similar access semantics as System RAM i.e. DMA
52
+ * coherent and supports page pinning. This is for example used by DAX devices
53
+ * that expose memory using a character device.
54
+ *
55
+ * MEMORY_DEVICE_PCI_P2PDMA:
56
+ * Device memory residing in a PCI BAR intended for use with Peer-to-Peer
57
+ * transactions.
5658 */
5759 enum memory_type {
60
+ /* 0 is reserved to catch uninitialized type fields */
5861 MEMORY_DEVICE_PRIVATE = 1,
59
- MEMORY_DEVICE_PUBLIC,
6062 MEMORY_DEVICE_FS_DAX,
63
+ MEMORY_DEVICE_GENERIC,
64
+ MEMORY_DEVICE_PCI_P2PDMA,
6165 };
6266
63
-/*
64
- * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two
65
- * callbacks:
66
- * page_fault()
67
- * page_free()
68
- *
69
- * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
70
- * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
71
- * explanation in include/linux/memory_hotplug.h.
72
- *
73
- * The page_fault() callback must migrate page back, from device memory to
74
- * system memory, so that the CPU can access it. This might fail for various
75
- * reasons (device issues, device have been unplugged, ...). When such error
76
- * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
77
- * set the CPU page table entry to "poisoned".
78
- *
79
- * Note that because memory cgroup charges are transferred to the device memory,
80
- * this should never fail due to memory restrictions. However, allocation
81
- * of a regular system page might still fail because we are out of memory. If
82
- * that happens, the page_fault() callback must return VM_FAULT_OOM.
83
- *
84
- * The page_fault() callback can also try to migrate back multiple pages in one
85
- * chunk, as an optimization. It must, however, prioritize the faulting address
86
- * over all the others.
87
- *
88
- *
89
- * The page_free() callback is called once the page refcount reaches 1
90
- * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
91
- * This allows the device driver to implement its own memory management.)
92
- *
93
- * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter.
94
- */
95
-typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
96
- unsigned long addr,
97
- const struct page *page,
98
- unsigned int flags,
99
- pmd_t *pmdp);
100
-typedef void (*dev_page_free_t)(struct page *page, void *data);
67
+struct dev_pagemap_ops {
68
+ /*
69
+ * Called once the page refcount reaches 1. (ZONE_DEVICE pages never
70
+ * reach 0 refcount unless there is a refcount bug. This allows the
71
+ * device driver to implement its own memory management.)
72
+ */
73
+ void (*page_free)(struct page *page);
74
+
75
+ /*
76
+ * Transition the refcount in struct dev_pagemap to the dead state.
77
+ */
78
+ void (*kill)(struct dev_pagemap *pgmap);
79
+
80
+ /*
81
+ * Wait for refcount in struct dev_pagemap to be idle and reap it.
82
+ */
83
+ void (*cleanup)(struct dev_pagemap *pgmap);
84
+
85
+ /*
86
+ * Used for private (un-addressable) device memory only. Must migrate
87
+ * the page back to a CPU accessible page.
88
+ */
89
+ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
90
+};
91
+
92
+#define PGMAP_ALTMAP_VALID (1 << 0)
10193
10294 /**
10395 * struct dev_pagemap - metadata for ZONE_DEVICE mappings
104
- * @page_fault: callback when CPU fault on an unaddressable device page
105
- * @page_free: free page callback when page refcount reaches 1
10696 * @altmap: pre-allocated/reserved memory for vmemmap allocations
107
- * @res: physical address range covered by @ref
10897 * @ref: reference count that pins the devm_memremap_pages() mapping
109
- * @kill: callback to transition @ref to the dead state
110
- * @dev: host device of the mapping for debug
111
- * @data: private data pointer for page_free()
98
+ * @internal_ref: internal reference if @ref is not provided by the caller
99
+ * @done: completion for @internal_ref
112100 * @type: memory type: see MEMORY_* in memory_hotplug.h
101
+ * @flags: PGMAP_* flags to specify defailed behavior
102
+ * @ops: method table
103
+ * @owner: an opaque pointer identifying the entity that manages this
104
+ * instance. Used by various helpers to make sure that no
105
+ * foreign ZONE_DEVICE memory is accessed.
106
+ * @nr_range: number of ranges to be mapped
107
+ * @range: range to be mapped when nr_range == 1
108
+ * @ranges: array of ranges to be mapped when nr_range > 1
113109 */
114110 struct dev_pagemap {
115
- dev_page_fault_t page_fault;
116
- dev_page_free_t page_free;
117111 struct vmem_altmap altmap;
118
- bool altmap_valid;
119
- struct resource res;
120112 struct percpu_ref *ref;
121
- void (*kill)(struct percpu_ref *ref);
122
- struct device *dev;
123
- void *data;
113
+ struct percpu_ref internal_ref;
114
+ struct completion done;
124115 enum memory_type type;
116
+ unsigned int flags;
117
+ const struct dev_pagemap_ops *ops;
118
+ void *owner;
119
+ int nr_range;
120
+ union {
121
+ struct range range;
122
+ struct range ranges[0];
123
+ };
125124 };
126125
126
+static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
127
+{
128
+ if (pgmap->flags & PGMAP_ALTMAP_VALID)
129
+ return &pgmap->altmap;
130
+ return NULL;
131
+}
132
+
127133 #ifdef CONFIG_ZONE_DEVICE
134
+void *memremap_pages(struct dev_pagemap *pgmap, int nid);
135
+void memunmap_pages(struct dev_pagemap *pgmap);
128136 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
137
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
129138 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
130139 struct dev_pagemap *pgmap);
140
+bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
131141
132142 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
133143 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
144
+unsigned long memremap_compat_align(void);
134145 #else
135146 static inline void *devm_memremap_pages(struct device *dev,
136147 struct dev_pagemap *pgmap)
....@@ -144,10 +155,20 @@
144155 return ERR_PTR(-ENXIO);
145156 }
146157
158
+static inline void devm_memunmap_pages(struct device *dev,
159
+ struct dev_pagemap *pgmap)
160
+{
161
+}
162
+
147163 static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
148164 struct dev_pagemap *pgmap)
149165 {
150166 return NULL;
167
+}
168
+
169
+static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
170
+{
171
+ return false;
151172 }
152173
153174 static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
....@@ -159,6 +180,12 @@
159180 unsigned long nr_pfns)
160181 {
161182 }
183
+
184
+/* when memremap_pages() is disabled all archs can remap a single page */
185
+static inline unsigned long memremap_compat_align(void)
186
+{
187
+ return PAGE_SIZE;
188
+}
162189 #endif /* CONFIG_ZONE_DEVICE */
163190
164191 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
....@@ -166,4 +193,5 @@
166193 if (pgmap)
167194 percpu_ref_put(pgmap->ref);
168195 }
196
+
169197 #endif /* _LINUX_MEMREMAP_H_ */