hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
....@@ -21,49 +21,25 @@
2121 *
2222 */
2323
24
-#include <drm/drmP.h>
24
+#include <linux/dma-mapping.h>
25
+
2526 #include "amdgpu.h"
2627 #include "amdgpu_ih.h"
27
-#include "amdgpu_amdkfd.h"
28
-
29
-/**
30
- * amdgpu_ih_ring_alloc - allocate memory for the IH ring
31
- *
32
- * @adev: amdgpu_device pointer
33
- *
34
- * Allocate a ring buffer for the interrupt controller.
35
- * Returns 0 for success, errors for failure.
36
- */
37
-static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
38
-{
39
- int r;
40
-
41
- /* Allocate ring buffer */
42
- if (adev->irq.ih.ring_obj == NULL) {
43
- r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size,
44
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
45
- &adev->irq.ih.ring_obj,
46
- &adev->irq.ih.gpu_addr,
47
- (void **)&adev->irq.ih.ring);
48
- if (r) {
49
- DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
50
- return r;
51
- }
52
- }
53
- return 0;
54
-}
5528
5629 /**
5730 * amdgpu_ih_ring_init - initialize the IH state
5831 *
5932 * @adev: amdgpu_device pointer
33
+ * @ih: ih ring to initialize
34
+ * @ring_size: ring size to allocate
35
+ * @use_bus_addr: true when we can use dma_alloc_coherent
6036 *
6137 * Initializes the IH state and allocates a buffer
6238 * for the IH ring buffer.
6339 * Returns 0 for success, errors for failure.
6440 */
65
-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
66
- bool use_bus_addr)
41
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
42
+ unsigned ring_size, bool use_bus_addr)
6743 {
6844 u32 rb_bufsz;
6945 int r;
....@@ -71,70 +47,87 @@
7147 /* Align ring size */
7248 rb_bufsz = order_base_2(ring_size / 4);
7349 ring_size = (1 << rb_bufsz) * 4;
74
- adev->irq.ih.ring_size = ring_size;
75
- adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1;
76
- adev->irq.ih.rptr = 0;
77
- adev->irq.ih.use_bus_addr = use_bus_addr;
50
+ ih->ring_size = ring_size;
51
+ ih->ptr_mask = ih->ring_size - 1;
52
+ ih->rptr = 0;
53
+ ih->use_bus_addr = use_bus_addr;
7854
79
- if (adev->irq.ih.use_bus_addr) {
80
- if (!adev->irq.ih.ring) {
81
- /* add 8 bytes for the rptr/wptr shadows and
82
- * add them to the end of the ring allocation.
83
- */
84
- adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
85
- adev->irq.ih.ring_size + 8,
86
- &adev->irq.ih.rb_dma_addr);
87
- if (adev->irq.ih.ring == NULL)
88
- return -ENOMEM;
89
- memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
90
- adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
91
- adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
92
- }
93
- return 0;
55
+ if (use_bus_addr) {
56
+ dma_addr_t dma_addr;
57
+
58
+ if (ih->ring)
59
+ return 0;
60
+
61
+ /* add 8 bytes for the rptr/wptr shadows and
62
+ * add them to the end of the ring allocation.
63
+ */
64
+ ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
65
+ &dma_addr, GFP_KERNEL);
66
+ if (ih->ring == NULL)
67
+ return -ENOMEM;
68
+
69
+ ih->gpu_addr = dma_addr;
70
+ ih->wptr_addr = dma_addr + ih->ring_size;
71
+ ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
72
+ ih->rptr_addr = dma_addr + ih->ring_size + 4;
73
+ ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
9474 } else {
95
- r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
75
+ unsigned wptr_offs, rptr_offs;
76
+
77
+ r = amdgpu_device_wb_get(adev, &wptr_offs);
78
+ if (r)
79
+ return r;
80
+
81
+ r = amdgpu_device_wb_get(adev, &rptr_offs);
9682 if (r) {
97
- dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
83
+ amdgpu_device_wb_free(adev, wptr_offs);
9884 return r;
9985 }
10086
101
- r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
87
+ r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
88
+ AMDGPU_GEM_DOMAIN_GTT,
89
+ &ih->ring_obj, &ih->gpu_addr,
90
+ (void **)&ih->ring);
10291 if (r) {
103
- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
104
- dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
92
+ amdgpu_device_wb_free(adev, rptr_offs);
93
+ amdgpu_device_wb_free(adev, wptr_offs);
10594 return r;
10695 }
10796
108
- return amdgpu_ih_ring_alloc(adev);
97
+ ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
98
+ ih->wptr_cpu = &adev->wb.wb[wptr_offs];
99
+ ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
100
+ ih->rptr_cpu = &adev->wb.wb[rptr_offs];
109101 }
102
+ return 0;
110103 }
111104
112105 /**
113106 * amdgpu_ih_ring_fini - tear down the IH state
114107 *
115108 * @adev: amdgpu_device pointer
109
+ * @ih: ih ring to tear down
116110 *
117111 * Tears down the IH state and frees buffer
118112 * used for the IH ring buffer.
119113 */
120
-void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
114
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
121115 {
122
- if (adev->irq.ih.use_bus_addr) {
123
- if (adev->irq.ih.ring) {
124
- /* add 8 bytes for the rptr/wptr shadows and
125
- * add them to the end of the ring allocation.
126
- */
127
- pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
128
- (void *)adev->irq.ih.ring,
129
- adev->irq.ih.rb_dma_addr);
130
- adev->irq.ih.ring = NULL;
131
- }
116
+ if (ih->use_bus_addr) {
117
+ if (!ih->ring)
118
+ return;
119
+
120
+ /* add 8 bytes for the rptr/wptr shadows and
121
+ * add them to the end of the ring allocation.
122
+ */
123
+ dma_free_coherent(adev->dev, ih->ring_size + 8,
124
+ (void *)ih->ring, ih->gpu_addr);
125
+ ih->ring = NULL;
132126 } else {
133
- amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
134
- &adev->irq.ih.gpu_addr,
135
- (void **)&adev->irq.ih.ring);
136
- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
137
- amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
127
+ amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
128
+ (void **)&ih->ring);
129
+ amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
130
+ amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
138131 }
139132 }
140133
....@@ -142,133 +135,44 @@
142135 * amdgpu_ih_process - interrupt handler
143136 *
144137 * @adev: amdgpu_device pointer
138
+ * @ih: ih ring to process
145139 *
146140 * Interrupt hander (VI), walk the IH ring.
147141 * Returns irq process return code.
148142 */
149
-int amdgpu_ih_process(struct amdgpu_device *adev)
143
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
150144 {
151
- struct amdgpu_iv_entry entry;
145
+ unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
152146 u32 wptr;
153147
154
- if (!adev->irq.ih.enabled || adev->shutdown)
148
+ if (!ih->enabled || adev->shutdown)
155149 return IRQ_NONE;
156150
157
- wptr = amdgpu_ih_get_wptr(adev);
151
+ wptr = amdgpu_ih_get_wptr(adev, ih);
158152
159153 restart_ih:
160154 /* is somebody else already processing irqs? */
161
- if (atomic_xchg(&adev->irq.ih.lock, 1))
155
+ if (atomic_xchg(&ih->lock, 1))
162156 return IRQ_NONE;
163157
164
- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr);
158
+ DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
165159
166160 /* Order reading of wptr vs. reading of IH ring data */
167161 rmb();
168162
169
- while (adev->irq.ih.rptr != wptr) {
170
- u32 ring_index = adev->irq.ih.rptr >> 2;
171
-
172
- /* Prescreening of high-frequency interrupts */
173
- if (!amdgpu_ih_prescreen_iv(adev)) {
174
- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
175
- continue;
176
- }
177
-
178
- /* Before dispatching irq to IP blocks, send it to amdkfd */
179
- amdgpu_amdkfd_interrupt(adev,
180
- (const void *) &adev->irq.ih.ring[ring_index]);
181
-
182
- entry.iv_entry = (const uint32_t *)
183
- &adev->irq.ih.ring[ring_index];
184
- amdgpu_ih_decode_iv(adev, &entry);
185
- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
186
-
187
- amdgpu_irq_dispatch(adev, &entry);
163
+ while (ih->rptr != wptr && --count) {
164
+ amdgpu_irq_dispatch(adev, ih);
165
+ ih->rptr &= ih->ptr_mask;
188166 }
189
- amdgpu_ih_set_rptr(adev);
190
- atomic_set(&adev->irq.ih.lock, 0);
167
+
168
+ amdgpu_ih_set_rptr(adev, ih);
169
+ atomic_set(&ih->lock, 0);
191170
192171 /* make sure wptr hasn't changed while processing */
193
- wptr = amdgpu_ih_get_wptr(adev);
194
- if (wptr != adev->irq.ih.rptr)
172
+ wptr = amdgpu_ih_get_wptr(adev, ih);
173
+ if (wptr != ih->rptr)
195174 goto restart_ih;
196175
197176 return IRQ_HANDLED;
198177 }
199178
200
-/**
201
- * amdgpu_ih_add_fault - Add a page fault record
202
- *
203
- * @adev: amdgpu device pointer
204
- * @key: 64-bit encoding of PASID and address
205
- *
206
- * This should be called when a retry page fault interrupt is
207
- * received. If this is a new page fault, it will be added to a hash
208
- * table. The return value indicates whether this is a new fault, or
209
- * a fault that was already known and is already being handled.
210
- *
211
- * If there are too many pending page faults, this will fail. Retry
212
- * interrupts should be ignored in this case until there is enough
213
- * free space.
214
- *
215
- * Returns 0 if the fault was added, 1 if the fault was already known,
216
- * -ENOSPC if there are too many pending faults.
217
- */
218
-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
219
-{
220
- unsigned long flags;
221
- int r = -ENOSPC;
222
-
223
- if (WARN_ON_ONCE(!adev->irq.ih.faults))
224
- /* Should be allocated in <IP>_ih_sw_init on GPUs that
225
- * support retry faults and require retry filtering.
226
- */
227
- return r;
228
-
229
- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
230
-
231
- /* Only let the hash table fill up to 50% for best performance */
232
- if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
233
- goto unlock_out;
234
-
235
- r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
236
- if (!r)
237
- adev->irq.ih.faults->count++;
238
-
239
- /* chash_table_copy_in should never fail unless we're losing count */
240
- WARN_ON_ONCE(r < 0);
241
-
242
-unlock_out:
243
- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
244
- return r;
245
-}
246
-
247
-/**
248
- * amdgpu_ih_clear_fault - Remove a page fault record
249
- *
250
- * @adev: amdgpu device pointer
251
- * @key: 64-bit encoding of PASID and address
252
- *
253
- * This should be called when a page fault has been handled. Any
254
- * future interrupt with this key will be processed as a new
255
- * page fault.
256
- */
257
-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
258
-{
259
- unsigned long flags;
260
- int r;
261
-
262
- if (!adev->irq.ih.faults)
263
- return;
264
-
265
- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
266
-
267
- r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
268
- if (!WARN_ON_ONCE(r < 0)) {
269
- adev->irq.ih.faults->count--;
270
- WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
271
- }
272
-
273
- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
274
-}