~hc/RK356X_SDK_RELEASE.git

..	..	@@ -21,49 +21,25 @@
21	21	*
22	22	*/
23	23
24		-#include <drm/drmP.h>
	24	+#include <linux/dma-mapping.h>
	25	+
25	26	#include "amdgpu.h"
26	27	#include "amdgpu_ih.h"
27		-#include "amdgpu_amdkfd.h"
28		-
29		-/**
30		- * amdgpu_ih_ring_alloc - allocate memory for the IH ring
31		- *
32		- * @adev: amdgpu_device pointer
33		- *
34		- * Allocate a ring buffer for the interrupt controller.
35		- * Returns 0 for success, errors for failure.
36		- */
37		-static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
38		-{
39		- int r;
40		-
41		- /* Allocate ring buffer */
42		- if (adev->irq.ih.ring_obj == NULL) {
43		- r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size,
44		- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
45		- &adev->irq.ih.ring_obj,
46		- &adev->irq.ih.gpu_addr,
47		- (void **)&adev->irq.ih.ring);
48		- if (r) {
49		- DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
50		- return r;
51		- }
52		- }
53		- return 0;
54		-}
55	28
56	29	/**
57	30	* amdgpu_ih_ring_init - initialize the IH state
58	31	*
59	32	* @adev: amdgpu_device pointer
	33	+ * @ih: ih ring to initialize
	34	+ * @ring_size: ring size to allocate
	35	+ * @use_bus_addr: true when we can use dma_alloc_coherent
60	36	*
61	37	* Initializes the IH state and allocates a buffer
62	38	* for the IH ring buffer.
63	39	* Returns 0 for success, errors for failure.
64	40	*/
65		-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
66		- bool use_bus_addr)
	41	+int amdgpu_ih_ring_init(struct amdgpu_device adev, struct amdgpu_ih_ring ih,
	42	+ unsigned ring_size, bool use_bus_addr)
67	43	{
68	44	u32 rb_bufsz;
69	45	int r;
..	..	@@ -71,70 +47,87 @@
71	47	/* Align ring size */
72	48	rb_bufsz = order_base_2(ring_size / 4);
73	49	ring_size = (1 << rb_bufsz) * 4;
74		- adev->irq.ih.ring_size = ring_size;
75		- adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1;
76		- adev->irq.ih.rptr = 0;
77		- adev->irq.ih.use_bus_addr = use_bus_addr;
	50	+ ih->ring_size = ring_size;
	51	+ ih->ptr_mask = ih->ring_size - 1;
	52	+ ih->rptr = 0;
	53	+ ih->use_bus_addr = use_bus_addr;
78	54
79		- if (adev->irq.ih.use_bus_addr) {
80		- if (!adev->irq.ih.ring) {
81		- /* add 8 bytes for the rptr/wptr shadows and
82		- * add them to the end of the ring allocation.
83		- */
84		- adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
85		- adev->irq.ih.ring_size + 8,
86		- &adev->irq.ih.rb_dma_addr);
87		- if (adev->irq.ih.ring == NULL)
88		- return -ENOMEM;
89		- memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
90		- adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
91		- adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
92		- }
93		- return 0;
	55	+ if (use_bus_addr) {
	56	+ dma_addr_t dma_addr;
	57	+
	58	+ if (ih->ring)
	59	+ return 0;
	60	+
	61	+ /* add 8 bytes for the rptr/wptr shadows and
	62	+ * add them to the end of the ring allocation.
	63	+ */
	64	+ ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
	65	+ &dma_addr, GFP_KERNEL);
	66	+ if (ih->ring == NULL)
	67	+ return -ENOMEM;
	68	+
	69	+ ih->gpu_addr = dma_addr;
	70	+ ih->wptr_addr = dma_addr + ih->ring_size;
	71	+ ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
	72	+ ih->rptr_addr = dma_addr + ih->ring_size + 4;
	73	+ ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
94	74	} else {
95		- r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
	75	+ unsigned wptr_offs, rptr_offs;
	76	+
	77	+ r = amdgpu_device_wb_get(adev, &wptr_offs);
	78	+ if (r)
	79	+ return r;
	80	+
	81	+ r = amdgpu_device_wb_get(adev, &rptr_offs);
96	82	if (r) {
97		- dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
	83	+ amdgpu_device_wb_free(adev, wptr_offs);
98	84	return r;
99	85	}
100	86
101		- r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
	87	+ r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
	88	+ AMDGPU_GEM_DOMAIN_GTT,
	89	+ &ih->ring_obj, &ih->gpu_addr,
	90	+ (void **)&ih->ring);
102	91	if (r) {
103		- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
104		- dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
	92	+ amdgpu_device_wb_free(adev, rptr_offs);
	93	+ amdgpu_device_wb_free(adev, wptr_offs);
105	94	return r;
106	95	}
107	96
108		- return amdgpu_ih_ring_alloc(adev);
	97	+ ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
	98	+ ih->wptr_cpu = &adev->wb.wb[wptr_offs];
	99	+ ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
	100	+ ih->rptr_cpu = &adev->wb.wb[rptr_offs];
109	101	}
	102	+ return 0;
110	103	}
111	104
112	105	/**
113	106	* amdgpu_ih_ring_fini - tear down the IH state
114	107	*
115	108	* @adev: amdgpu_device pointer
	109	+ * @ih: ih ring to tear down
116	110	*
117	111	* Tears down the IH state and frees buffer
118	112	* used for the IH ring buffer.
119	113	*/
120		-void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
	114	+void amdgpu_ih_ring_fini(struct amdgpu_device adev, struct amdgpu_ih_ring ih)
121	115	{
122		- if (adev->irq.ih.use_bus_addr) {
123		- if (adev->irq.ih.ring) {
124		- /* add 8 bytes for the rptr/wptr shadows and
125		- * add them to the end of the ring allocation.
126		- */
127		- pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
128		- (void *)adev->irq.ih.ring,
129		- adev->irq.ih.rb_dma_addr);
130		- adev->irq.ih.ring = NULL;
131		- }
	116	+ if (ih->use_bus_addr) {
	117	+ if (!ih->ring)
	118	+ return;
	119	+
	120	+ /* add 8 bytes for the rptr/wptr shadows and
	121	+ * add them to the end of the ring allocation.
	122	+ */
	123	+ dma_free_coherent(adev->dev, ih->ring_size + 8,
	124	+ (void *)ih->ring, ih->gpu_addr);
	125	+ ih->ring = NULL;
132	126	} else {
133		- amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
134		- &adev->irq.ih.gpu_addr,
135		- (void **)&adev->irq.ih.ring);
136		- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
137		- amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
	127	+ amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
	128	+ (void **)&ih->ring);
	129	+ amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
	130	+ amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
138	131	}
139	132	}
140	133
..	..	@@ -142,133 +135,44 @@
142	135	* amdgpu_ih_process - interrupt handler
143	136	*
144	137	* @adev: amdgpu_device pointer
	138	+ * @ih: ih ring to process
145	139	*
146	140	* Interrupt hander (VI), walk the IH ring.
147	141	* Returns irq process return code.
148	142	*/
149		-int amdgpu_ih_process(struct amdgpu_device *adev)
	143	+int amdgpu_ih_process(struct amdgpu_device adev, struct amdgpu_ih_ring ih)
150	144	{
151		- struct amdgpu_iv_entry entry;
	145	+ unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
152	146	u32 wptr;
153	147
154		- if (!adev->irq.ih.enabled \|\| adev->shutdown)
	148	+ if (!ih->enabled \|\| adev->shutdown)
155	149	return IRQ_NONE;
156	150
157		- wptr = amdgpu_ih_get_wptr(adev);
	151	+ wptr = amdgpu_ih_get_wptr(adev, ih);
158	152
159	153	restart_ih:
160	154	/* is somebody else already processing irqs? */
161		- if (atomic_xchg(&adev->irq.ih.lock, 1))
	155	+ if (atomic_xchg(&ih->lock, 1))
162	156	return IRQ_NONE;
163	157
164		- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr);
	158	+ DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
165	159
166	160	/* Order reading of wptr vs. reading of IH ring data */
167	161	rmb();
168	162
169		- while (adev->irq.ih.rptr != wptr) {
170		- u32 ring_index = adev->irq.ih.rptr >> 2;
171		-
172		- /* Prescreening of high-frequency interrupts */
173		- if (!amdgpu_ih_prescreen_iv(adev)) {
174		- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
175		- continue;
176		- }
177		-
178		- /* Before dispatching irq to IP blocks, send it to amdkfd */
179		- amdgpu_amdkfd_interrupt(adev,
180		- (const void *) &adev->irq.ih.ring[ring_index]);
181		-
182		- entry.iv_entry = (const uint32_t *)
183		- &adev->irq.ih.ring[ring_index];
184		- amdgpu_ih_decode_iv(adev, &entry);
185		- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
186		-
187		- amdgpu_irq_dispatch(adev, &entry);
	163	+ while (ih->rptr != wptr && --count) {
	164	+ amdgpu_irq_dispatch(adev, ih);
	165	+ ih->rptr &= ih->ptr_mask;
188	166	}
189		- amdgpu_ih_set_rptr(adev);
190		- atomic_set(&adev->irq.ih.lock, 0);
	167	+
	168	+ amdgpu_ih_set_rptr(adev, ih);
	169	+ atomic_set(&ih->lock, 0);
191	170
192	171	/* make sure wptr hasn't changed while processing */
193		- wptr = amdgpu_ih_get_wptr(adev);
194		- if (wptr != adev->irq.ih.rptr)
	172	+ wptr = amdgpu_ih_get_wptr(adev, ih);
	173	+ if (wptr != ih->rptr)
195	174	goto restart_ih;
196	175
197	176	return IRQ_HANDLED;
198	177	}
199	178
200		-/**
201		- * amdgpu_ih_add_fault - Add a page fault record
202		- *
203		- * @adev: amdgpu device pointer
204		- * @key: 64-bit encoding of PASID and address
205		- *
206		- * This should be called when a retry page fault interrupt is
207		- * received. If this is a new page fault, it will be added to a hash
208		- * table. The return value indicates whether this is a new fault, or
209		- * a fault that was already known and is already being handled.
210		- *
211		- * If there are too many pending page faults, this will fail. Retry
212		- * interrupts should be ignored in this case until there is enough
213		- * free space.
214		- *
215		- * Returns 0 if the fault was added, 1 if the fault was already known,
216		- * -ENOSPC if there are too many pending faults.
217		- */
218		-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
219		-{
220		- unsigned long flags;
221		- int r = -ENOSPC;
222		-
223		- if (WARN_ON_ONCE(!adev->irq.ih.faults))
224		- /* Should be allocated in <IP>_ih_sw_init on GPUs that
225		- * support retry faults and require retry filtering.
226		- */
227		- return r;
228		-
229		- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
230		-
231		- /* Only let the hash table fill up to 50% for best performance */
232		- if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
233		- goto unlock_out;
234		-
235		- r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
236		- if (!r)
237		- adev->irq.ih.faults->count++;
238		-
239		- /* chash_table_copy_in should never fail unless we're losing count */
240		- WARN_ON_ONCE(r < 0);
241		-
242		-unlock_out:
243		- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
244		- return r;
245		-}
246		-
247		-/**
248		- * amdgpu_ih_clear_fault - Remove a page fault record
249		- *
250		- * @adev: amdgpu device pointer
251		- * @key: 64-bit encoding of PASID and address
252		- *
253		- * This should be called when a page fault has been handled. Any
254		- * future interrupt with this key will be processed as a new
255		- * page fault.
256		- */
257		-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
258		-{
259		- unsigned long flags;
260		- int r;
261		-
262		- if (!adev->irq.ih.faults)
263		- return;
264		-
265		- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
266		-
267		- r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
268		- if (!WARN_ON_ONCE(r < 0)) {
269		- adev->irq.ih.faults->count--;
270		- WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
271		- }
272		-
273		- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
274		-}