From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1734 +++++++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 1,314 insertions(+), 420 deletions(-)
diff --git a/kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index de5a689..dbcaef3 100644
--- a/kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -21,16 +21,31 @@
*
*/
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
-#include "sdma0/sdma0_4_0_offset.h"
-#include "sdma0/sdma0_4_0_sh_mask.h"
-#include "sdma1/sdma1_4_0_offset.h"
-#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "sdma0/sdma0_4_2_offset.h"
+#include "sdma0/sdma0_4_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_offset.h"
+#include "sdma1/sdma1_4_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
#include "hdp/hdp_4_0_offset.h"
#include "sdma0/sdma0_4_1_default.h"
@@ -41,6 +56,8 @@
#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+#include "amdgpu_ras.h"
+
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
@@ -48,14 +65,25 @@
MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
+
+#define WREG32_SDMA(instance, offset, value) \
+ WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+ RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -88,21 +116,24 @@
static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
};
static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
};
-static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
-{
+static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
@@ -112,30 +143,73 @@
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
-static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
{
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
};
static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
@@ -144,11 +218,234 @@
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
};
+static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_arct[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_UTCL1_TIMEOUT, 0xffffffff, 0x00010001)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+};
+
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
{
- return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
- (adev->reg_offset[SDMA1_HWIP][0][0] + offset));
+ switch (instance) {
+ case 0:
+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
+ case 1:
+ return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
+ case 2:
+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
+ case 3:
+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
+ case 4:
+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
+ case 5:
+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
+ case 6:
+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
+ case 7:
+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ case 4:
+ return SOC15_IH_CLIENTID_SDMA4;
+ case 5:
+ return SOC15_IH_CLIENTID_SDMA5;
+ case 6:
+ return SOC15_IH_CLIENTID_SDMA6;
+ case 7:
+ return SOC15_IH_CLIENTID_SDMA7;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ return 3;
+ case SOC15_IH_CLIENTID_SDMA4:
+ return 4;
+ case SOC15_IH_CLIENTID_SDMA5:
+ return 5;
+ case SOC15_IH_CLIENTID_SDMA6:
+ return 6;
+ case SOC15_IH_CLIENTID_SDMA7:
+ return 7;
+ default:
+ break;
+ }
+ return -EINVAL;
}
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
@@ -156,11 +453,11 @@
switch (adev->asic_type) {
case CHIP_VEGA10:
soc15_program_register_sequence(adev,
- golden_settings_sdma_4,
- ARRAY_SIZE(golden_settings_sdma_4));
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
soc15_program_register_sequence(adev,
- golden_settings_sdma_vg10,
- ARRAY_SIZE(golden_settings_sdma_vg10));
+ golden_settings_sdma_vg10,
+ ARRAY_SIZE(golden_settings_sdma_vg10));
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -172,20 +469,108 @@
break;
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
- golden_settings_sdma_4_2,
- ARRAY_SIZE(golden_settings_sdma_4_2));
+ golden_settings_sdma0_4_2_init,
+ ARRAY_SIZE(golden_settings_sdma0_4_2_init));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma0_4_2,
+ ARRAY_SIZE(golden_settings_sdma0_4_2));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma1_4_2,
+ ARRAY_SIZE(golden_settings_sdma1_4_2));
+ break;
+ case CHIP_ARCTURUS:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_arct,
+ ARRAY_SIZE(golden_settings_sdma_arct));
break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
- golden_settings_sdma_4_1,
- ARRAY_SIZE(golden_settings_sdma_4_1));
+ golden_settings_sdma_4_1,
+ ARRAY_SIZE(golden_settings_sdma_4_1));
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv2,
+ ARRAY_SIZE(golden_settings_sdma_rv2));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv1,
+ ARRAY_SIZE(golden_settings_sdma_rv1));
+ break;
+ case CHIP_RENOIR:
soc15_program_register_sequence(adev,
- golden_settings_sdma_rv1,
- ARRAY_SIZE(golden_settings_sdma_rv1));
+ golden_settings_sdma_4_3,
+ ARRAY_SIZE(golden_settings_sdma_4_3));
break;
default:
break;
}
+}
+
+static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
+{
+ int i;
+
+ /*
+ * The only chips with SDMAv4 and ULV are VG10 and VG20.
+ * Server SKUs take a different hysteresis setting from other SKUs.
+ */
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ if (adev->pdev->device == 0x6860)
+ break;
+ return;
+ case CHIP_VEGA20:
+ if (adev->pdev->device == 0x66a1)
+ break;
+ return;
+ default:
+ return;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ uint32_t temp;
+
+ temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0);
+ WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp);
+ }
+}
+
+static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ int err = 0;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ err = amdgpu_ucode_validate(sdma_inst->fw);
+ if (err)
+ return err;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
+
+ /* arcturus shares the same FW memory across
+ all SDMA isntances */
+ if (adev->asic_type == CHIP_ARCTURUS)
+ break;
+ }
+
+ memset((void*)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
}
/**
@@ -207,7 +592,9 @@
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
- const struct sdma_firmware_header_v1_0 *hdr;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
DRM_DEBUG("\n");
@@ -222,32 +609,62 @@
chip_name = "vega20";
break;
case CHIP_RAVEN:
- chip_name = "raven";
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ chip_name = "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
+ break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
+ case CHIP_RENOIR:
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ chip_name = "renoir";
+ else
+ chip_name = "green_sardine";
break;
default:
BUG();
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
- if (err)
- goto out;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
- if (adev->sdma.instance[i].feature_version >= 20)
- adev->sdma.instance[i].burst_nop = true;
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
+ if (err)
+ goto out;
+
+ for (i = 1; i < adev->sdma.num_instances; i++) {
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* Acturus will leverage the same FW memory
+ for every SDMA instance */
+ memcpy((void*)&adev->sdma.instance[i],
+ (void*)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+ else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
+
+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
+ if (err)
+ goto out;
+ }
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
info->fw = adev->sdma.instance[i].fw;
@@ -256,13 +673,11 @@
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
}
}
+
out:
if (err) {
DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
+ sdma_v4_0_destroy_inst_ctx(adev);
}
return err;
}
@@ -302,23 +717,18 @@
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
- u32 lowbit, highbit;
-
- lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
- highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
-
- DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
- ring->me, highbit, lowbit);
- wptr = highbit;
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
wptr = wptr << 32;
- wptr |= lowbit;
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+ ring->me, wptr);
}
return wptr >> 2;
}
/**
- * sdma_v4_0_ring_set_wptr - commit the write pointer
+ * sdma_v4_0_page_ring_set_wptr - commit the write pointer
*
* @ring: amdgpu ring pointer
*
@@ -352,14 +762,67 @@
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+/**
+ * sdma_v4_0_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VEGA10+).
+ */
+static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ } else {
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VEGA10+).
+ */
+static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ uint64_t wptr = ring->wptr << 2;
+
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
+ lower_32_bits(wptr));
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
+ upper_32_bits(wptr));
}
}
static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@@ -379,11 +842,14 @@
* Schedule an IB in the DMA ring (VEGA10).
*/
static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_ib *ib,
- unsigned vmid, bool ctx_switch)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
/* IB packet must end on a 8 DW boundary */
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -432,16 +898,13 @@
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask = 0;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->me == 0)
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
- else
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
sdma_v4_0_wait_reg_mem(ring, 0, 1,
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
ref_and_mask, ref_and_mask, 10);
}
@@ -493,26 +956,25 @@
*/
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
- int i;
-
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ int i, unset = 0;
for (i = 0; i < adev->sdma.num_instances; i++) {
- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- }
+ sdma[i] = &adev->sdma.instance[i].ring;
- sdma0->ready = false;
- sdma1->ready = false;
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = 1;
+ }
+
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
+ }
}
/**
@@ -528,7 +990,41 @@
}
/**
- * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v4_0_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the page async dma ring buffers (VEGA10).
+ */
+static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 rb_cntl, ib_cntl;
+ int i;
+ bool unset = false;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma[i] = &adev->sdma.instance[i].page;
+
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
+ (!unset)) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = true;
+ }
+
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+ RB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
+ IB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_0_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
@@ -565,18 +1061,24 @@
}
for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, enable ? 1 : 0);
if (enable && amdgpu_sdma_phase_quantum) {
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
- phase_quantum);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
- phase_quantum);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
- phase_quantum);
+ WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
}
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
+
+ /*
+ * Enable SDMA utilization. Its only supported on
+ * Arcturus for the moment and firmware version 14
+ * and above.
+ */
+ if (adev->asic_type == CHIP_ARCTURUS &&
+ adev->sdma.instance[i].fw_version >= 14)
+ WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
}
}
@@ -594,159 +1096,216 @@
u32 f32_cntl;
int i;
- if (enable == false) {
+ if (!enable) {
sdma_v4_0_gfx_stop(adev);
sdma_v4_0_rlc_stop(adev);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_0_page_stop(adev);
}
for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
}
+}
+
+/**
+ * sdma_v4_0_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+ /* Set ring buffer size in dwords */
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ return rb_cntl;
}
/**
* sdma_v4_0_gfx_resume - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
+ * @i: instance to resume
*
* Set up the gfx DMA ring buffers and enable them (VEGA10).
* Returns 0 for success, error for failure.
*/
-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
- struct amdgpu_ring *ring;
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
- u32 rb_bufsz;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
- u32 temp;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ wb_offset = (ring->rptr_offs * 4);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
-#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
-#endif
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
- /* Initialize the ring buffer's read and write pointers */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
- /* set the wb address whether it's enabled or not */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+ ring->wptr = 0;
- ring->wptr = 0;
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
-
- doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
-
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA0_GFX_DOORBELL_OFFSET,
OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index);
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
- if (amdgpu_sriov_vf(adev))
- sdma_v4_0_ring_set_wptr(ring);
+ sdma_v4_0_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- if (amdgpu_sriov_vf(adev))
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
- else
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
-
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
- ring->ready = true;
+ ring->sched.ready = true;
+}
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v4_0_ctx_switch_enable(adev, true);
- sdma_v4_0_enable(adev, true);
- }
+/**
+ * sdma_v4_0_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the page DMA ring buffers and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- return r;
- }
+ wb_offset = (ring->rptr_offs * 4);
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
- }
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
- return 0;
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA0_PAGE_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
+
+ /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */
+ sdma_v4_0_page_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_PAGE_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+
+ ring->sched.ready = true;
}
static void
@@ -755,7 +1314,7 @@
uint32_t def, data;
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
- /* disable idle interrupt */
+ /* enable idle interrupt */
def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
@@ -804,6 +1363,7 @@
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
sdma_v4_1_init_power_gating(adev);
sdma_v4_1_update_power_gating(adev, true);
break;
@@ -857,12 +1417,14 @@
(adev->sdma.instance[i].fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
for (j = 0; j < fw_size; j++)
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
+ le32_to_cpup(fw_data++));
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
+ adev->sdma.instance[i].fw_version);
}
return 0;
@@ -878,33 +1440,78 @@
*/
static int sdma_v4_0_start(struct amdgpu_device *adev)
{
- int r = 0;
+ struct amdgpu_ring *ring;
+ int i, r = 0;
if (amdgpu_sriov_vf(adev)) {
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
+ } else {
- /* set RB registers */
- r = sdma_v4_0_gfx_resume(adev);
- return r;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ r = sdma_v4_0_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v4_0_ctx_switch_enable(adev, true);
}
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- r = sdma_v4_0_load_microcode(adev);
+ /* start the gfx rings and rlc compute queues */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ uint32_t temp;
+
+ WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+ sdma_v4_0_gfx_resume(adev, i);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_0_page_resume(adev, i);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32_SDMA(i, mmSDMA0_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_CNTL, temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_0_ctx_switch_enable(adev, true);
+ sdma_v4_0_enable(adev, true);
+ } else {
+ r = sdma_v4_0_rlc_resume(adev);
if (r)
return r;
}
- /* unhalt the MEs */
- sdma_v4_0_enable(adev, true);
- /* enable sdma ring preemption */
- sdma_v4_0_ctx_switch_enable(adev, true);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
- /* start the gfx rings and rlc compute queues */
- r = sdma_v4_0_gfx_resume(adev);
- if (r)
- return r;
- r = sdma_v4_0_rlc_resume(adev);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+ r = amdgpu_ring_test_helper(page);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == page)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
return r;
}
@@ -928,21 +1535,16 @@
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ if (r)
return r;
- }
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
- if (r) {
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_device_wb_free(adev, index);
- return r;
- }
+ if (r)
+ goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -956,18 +1558,14 @@
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
- if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
- r = -EINVAL;
- }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
amdgpu_device_wb_free(adev, index);
-
return r;
}
@@ -990,20 +1588,17 @@
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ if (r)
return r;
- }
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
goto err0;
- }
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -1022,21 +1617,17 @@
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF) {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+ else
r = -EINVAL;
- }
+
err1:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@@ -1141,11 +1732,11 @@
*/
static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1207,23 +1798,77 @@
sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
}
+static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
+{
+ uint fw_version = adev->sdma.instance[0].fw_version;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return fw_version >= 430;
+ case CHIP_VEGA12:
+ /*return fw_version >= 31;*/
+ return false;
+ case CHIP_VEGA20:
+ return fw_version >= 123;
+ default:
+ return false;
+ }
+}
+
static int sdma_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
- if (adev->asic_type == CHIP_RAVEN)
+ if (adev->flags & AMD_IS_APU)
adev->sdma.num_instances = 1;
+ else if (adev->asic_type == CHIP_ARCTURUS)
+ adev->sdma.num_instances = 8;
else
adev->sdma.num_instances = 2;
+
+ r = sdma_v4_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ /* TODO: Page queue breaks driver reload under SRIOV */
+ if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
+ adev->sdma.has_page_queue = false;
+ else if (sdma_v4_0_fw_support_paging_queue(adev))
+ adev->sdma.has_page_queue = true;
sdma_v4_0_set_ring_funcs(adev);
sdma_v4_0_set_buffer_funcs(adev);
sdma_v4_0_set_vm_pte_funcs(adev);
sdma_v4_0_set_irq_funcs(adev);
+ sdma_v4_0_set_ras_funcs(adev);
return 0;
}
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+
+static int sdma_v4_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct ras_ih_if ih_info = {
+ .cb = sdma_v4_0_process_ras_data_cb,
+ };
+
+ sdma_v4_0_setup_ulv(adev);
+
+ if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
+ adev->sdma.funcs->reset_ras_error_count(adev);
+
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
+ return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+ else
+ return 0;
+}
static int sdma_v4_0_sw_init(void *handle)
{
@@ -1232,21 +1877,21 @@
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
- &adev->sdma.trap_irq);
- if (r)
- return r;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
- /* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
- &adev->sdma.trap_irq);
- if (r)
- return r;
-
- r = sdma_v4_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
+ /* SDMA SRAM ECC event */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
}
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1254,21 +1899,38 @@
ring->ring_obj = NULL;
ring->use_doorbell = true;
- DRM_INFO("use_doorbell being set to: [%s]\n",
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
- ring->doorbell_index = (i == 0) ?
- (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
- : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
+ /* doorbell size is 2 dwords, get DWORD offset */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
sprintf(ring->name, "sdma%d", i);
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->sdma.trap_irq,
- (i == 0) ?
- AMDGPU_SDMA_IRQ_TRAP0 :
- AMDGPU_SDMA_IRQ_TRAP1);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
+
+ if (adev->sdma.has_page_queue) {
+ ring = &adev->sdma.instance[i].page;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ /* paging queue use same doorbell index/routing as gfx queue
+ * with 0x400 (4096 dwords) offset on second doorbell page
+ */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ ring->doorbell_index += 0x400;
+
+ sprintf(ring->name, "page%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+ }
}
return r;
@@ -1279,13 +1941,16 @@
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
- amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
+ adev->sdma.funcs->ras_fini(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
+
+ sdma_v4_0_destroy_inst_ctx(adev);
return 0;
}
@@ -1295,7 +1960,11 @@
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- sdma_v4_0_init_golden_registers(adev);
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_0_init_golden_registers(adev);
r = sdma_v4_0_start(adev);
@@ -1305,12 +1974,23 @@
static int sdma_v4_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
if (amdgpu_sriov_vf(adev))
return 0;
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
return 0;
}
@@ -1335,7 +2015,7 @@
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+ u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
return false;
@@ -1346,15 +2026,17 @@
static int sdma_v4_0_wait_for_idle(void *handle)
{
- unsigned i;
- u32 sdma0, sdma1;
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
- sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
- sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
-
- if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
+ if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
return 0;
udelay(1);
}
@@ -1375,14 +2057,10 @@
{
u32 sdma_cntl;
- u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
- sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
- sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
-
- sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL);
sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
- WREG32(reg_offset, sdma_cntl);
+ WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl);
return 0;
}
@@ -1391,114 +2069,121 @@
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t instance;
+
DRM_DEBUG("IH: SDMA trap\n");
- switch (entry->client_id) {
- case SOC15_IH_CLIENTID_SDMA0:
- switch (entry->ring_id) {
- case 0:
- amdgpu_fence_process(&adev->sdma.instance[0].ring);
- break;
- case 1:
- /* XXX compute */
- break;
- case 2:
- /* XXX compute */
- break;
- case 3:
- /* XXX page queue*/
- break;
- }
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instance].ring);
break;
- case SOC15_IH_CLIENTID_SDMA1:
- switch (entry->ring_id) {
- case 0:
- amdgpu_fence_process(&adev->sdma.instance[1].ring);
- break;
- case 1:
- /* XXX compute */
- break;
- case 2:
- /* XXX compute */
- break;
- case 3:
- /* XXX page queue*/
- break;
- }
+ case 1:
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ if (adev->asic_type != CHIP_VEGA20)
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
}
return 0;
+}
+
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ goto out;
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ goto out;
+
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+
+out:
+ return AMDGPU_RAS_SUCCESS;
}
static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ int instance;
+
DRM_ERROR("Illegal instruction in SDMA command stream\n");
- schedule_work(&adev->reset_work);
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return 0;
+
+ switch (entry->ring_id) {
+ case 0:
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
+ break;
+ }
return 0;
}
+static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_edc_config;
+
+ sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
+ sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
+
+ return 0;
+}
static void sdma_v4_0_update_medium_grain_clock_gating(
struct amdgpu_device *adev,
bool enable)
{
uint32_t data, def;
+ int i;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
- /* enable sdma0 clock gating */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
-
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
}
} else {
- /* disable sdma0 clock gating */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
- data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
-
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
- data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
}
}
}
@@ -1509,34 +2194,23 @@
bool enable)
{
uint32_t data, def;
+ int i;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
- /* 1-not override: enable sdma0 mem light sleep */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
-
- /* 1-not override: enable sdma1 mem light sleep */
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
}
} else {
- /* 0-override:disable sdma0 mem light sleep */
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
-
- /* 0-override:disable sdma1 mem light sleep */
- if (adev->sdma.num_instances > 1) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
}
}
}
@@ -1554,10 +2228,12 @@
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
sdma_v4_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v4_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1572,6 +2248,7 @@
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
sdma_v4_1_update_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : false);
break;
@@ -1604,7 +2281,7 @@
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.name = "sdma_v4_0",
.early_init = sdma_v4_0_early_init,
- .late_init = NULL,
+ .late_init = sdma_v4_0_late_init,
.sw_init = sdma_v4_0_sw_init,
.sw_fini = sdma_v4_0_sw_fini,
.hw_init = sdma_v4_0_hw_init,
@@ -1624,10 +2301,110 @@
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_MMHUB,
+ .vmhub = AMDGPU_MMHUB_0,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_ring_get_wptr,
.set_wptr = sdma_v4_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/*
+ * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
+ * So create a individual constant ring_funcs for those instances.
+ */
+static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_ring_get_wptr,
+ .set_wptr = sdma_v4_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
.emit_frame_size =
6 + /* sdma_v4_0_ring_emit_hdp_flush */
3 + /* hdp invalidate */
@@ -1656,8 +2433,22 @@
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
+ adev->sdma.instance[i].ring.funcs =
+ &sdma_v4_0_ring_funcs_2nd_mmhub;
+ else
+ adev->sdma.instance[i].ring.funcs =
+ &sdma_v4_0_ring_funcs;
adev->sdma.instance[i].ring.me = i;
+ if (adev->sdma.has_page_queue) {
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_0_page_ring_funcs_2nd_mmhub;
+ else
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_0_page_ring_funcs;
+ adev->sdma.instance[i].page.me = i;
+ }
}
}
@@ -1670,11 +2461,33 @@
.process = sdma_v4_0_process_illegal_inst_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
+ .set = sdma_v4_0_set_ecc_irq_state,
+ .process = amdgpu_sdma_process_ecc_irq,
+};
+
+
+
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ switch (adev->sdma.num_instances) {
+ case 1:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
+ break;
+ case 8:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ break;
+ case 2:
+ default:
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
+ break;
+ }
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
+ adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
}
/**
@@ -1692,10 +2505,12 @@
static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
- uint32_t byte_count)
+ uint32_t byte_count,
+ bool tmz)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -1738,10 +2553,11 @@
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
+ adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
+ else
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
}
static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
@@ -1754,15 +2570,93 @@
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_0_ras_fields[i].sec_count_mask) >>
+ sdma_v4_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_0_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
+ }
+ }
+}
+
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+
+ reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_0_get_ras_error_count(reg_value,
+ instance, &sec_count);
+ /* err_data->ce_count should be initialized to 0
+ * before calling into this function */
+ err_data->ce_count += sec_count;
+ /* double bit error is not supported
+ * set ue count to 0 */
+ err_data->ue_count = 0;
+
+ return 0;
+};
+
+static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+
+ /* read back edc counter registers to clear the counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
+ }
+}
+
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_sdma_ras_late_init,
+ .ras_fini = amdgpu_sdma_ras_fini,
+ .query_ras_error_count = sdma_v4_0_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+ break;
+ default:
+ break;
}
}
--
Gitblit v1.6.2