| .. | .. |
|---|
| 20 | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
|---|
| 21 | 21 | * |
|---|
| 22 | 22 | */ |
|---|
| 23 | + |
|---|
| 23 | 24 | #include <linux/firmware.h> |
|---|
| 25 | +#include <linux/pci.h> |
|---|
| 26 | + |
|---|
| 24 | 27 | #include <drm/drm_cache.h> |
|---|
| 28 | + |
|---|
| 25 | 29 | #include "amdgpu.h" |
|---|
| 26 | 30 | #include "gmc_v9_0.h" |
|---|
| 27 | 31 | #include "amdgpu_atomfirmware.h" |
|---|
| 32 | +#include "amdgpu_gem.h" |
|---|
| 28 | 33 | |
|---|
| 29 | 34 | #include "hdp/hdp_4_0_offset.h" |
|---|
| 30 | 35 | #include "hdp/hdp_4_0_sh_mask.h" |
|---|
| .. | .. |
|---|
| 33 | 38 | #include "dce/dce_12_0_sh_mask.h" |
|---|
| 34 | 39 | #include "vega10_enum.h" |
|---|
| 35 | 40 | #include "mmhub/mmhub_1_0_offset.h" |
|---|
| 41 | +#include "athub/athub_1_0_sh_mask.h" |
|---|
| 36 | 42 | #include "athub/athub_1_0_offset.h" |
|---|
| 37 | 43 | #include "oss/osssys_4_0_offset.h" |
|---|
| 38 | 44 | |
|---|
| 39 | 45 | #include "soc15.h" |
|---|
| 46 | +#include "soc15d.h" |
|---|
| 40 | 47 | #include "soc15_common.h" |
|---|
| 41 | 48 | #include "umc/umc_6_0_sh_mask.h" |
|---|
| 42 | 49 | |
|---|
| 43 | 50 | #include "gfxhub_v1_0.h" |
|---|
| 44 | 51 | #include "mmhub_v1_0.h" |
|---|
| 52 | +#include "athub_v1_0.h" |
|---|
| 53 | +#include "gfxhub_v1_1.h" |
|---|
| 54 | +#include "mmhub_v9_4.h" |
|---|
| 55 | +#include "umc_v6_1.h" |
|---|
| 56 | +#include "umc_v6_0.h" |
|---|
| 45 | 57 | |
|---|
| 46 | 58 | #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" |
|---|
| 59 | + |
|---|
| 60 | +#include "amdgpu_ras.h" |
|---|
| 61 | +#include "amdgpu_xgmi.h" |
|---|
| 47 | 62 | |
|---|
| 48 | 63 | /* add these here since we already include dce12 headers and these are for DCN */ |
|---|
| 49 | 64 | #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d |
|---|
| .. | .. |
|---|
| 52 | 67 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 |
|---|
| 53 | 68 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL |
|---|
| 54 | 69 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L |
|---|
| 70 | +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d |
|---|
| 71 | +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 |
|---|
| 55 | 72 | |
|---|
| 56 | | -/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ |
|---|
| 57 | | -#define AMDGPU_NUM_OF_VMIDS 8 |
|---|
| 73 | + |
|---|
| 74 | +static const char *gfxhub_client_ids[] = { |
|---|
| 75 | + "CB", |
|---|
| 76 | + "DB", |
|---|
| 77 | + "IA", |
|---|
| 78 | + "WD", |
|---|
| 79 | + "CPF", |
|---|
| 80 | + "CPC", |
|---|
| 81 | + "CPG", |
|---|
| 82 | + "RLC", |
|---|
| 83 | + "TCP", |
|---|
| 84 | + "SQC (inst)", |
|---|
| 85 | + "SQC (data)", |
|---|
| 86 | + "SQG", |
|---|
| 87 | + "PA", |
|---|
| 88 | +}; |
|---|
| 89 | + |
|---|
| 90 | +static const char *mmhub_client_ids_raven[][2] = { |
|---|
| 91 | + [0][0] = "MP1", |
|---|
| 92 | + [1][0] = "MP0", |
|---|
| 93 | + [2][0] = "VCN", |
|---|
| 94 | + [3][0] = "VCNU", |
|---|
| 95 | + [4][0] = "HDP", |
|---|
| 96 | + [5][0] = "DCE", |
|---|
| 97 | + [13][0] = "UTCL2", |
|---|
| 98 | + [19][0] = "TLS", |
|---|
| 99 | + [26][0] = "OSS", |
|---|
| 100 | + [27][0] = "SDMA0", |
|---|
| 101 | + [0][1] = "MP1", |
|---|
| 102 | + [1][1] = "MP0", |
|---|
| 103 | + [2][1] = "VCN", |
|---|
| 104 | + [3][1] = "VCNU", |
|---|
| 105 | + [4][1] = "HDP", |
|---|
| 106 | + [5][1] = "XDP", |
|---|
| 107 | + [6][1] = "DBGU0", |
|---|
| 108 | + [7][1] = "DCE", |
|---|
| 109 | + [8][1] = "DCEDWB0", |
|---|
| 110 | + [9][1] = "DCEDWB1", |
|---|
| 111 | + [26][1] = "OSS", |
|---|
| 112 | + [27][1] = "SDMA0", |
|---|
| 113 | +}; |
|---|
| 114 | + |
|---|
| 115 | +static const char *mmhub_client_ids_renoir[][2] = { |
|---|
| 116 | + [0][0] = "MP1", |
|---|
| 117 | + [1][0] = "MP0", |
|---|
| 118 | + [2][0] = "HDP", |
|---|
| 119 | + [4][0] = "DCEDMC", |
|---|
| 120 | + [5][0] = "DCEVGA", |
|---|
| 121 | + [13][0] = "UTCL2", |
|---|
| 122 | + [19][0] = "TLS", |
|---|
| 123 | + [26][0] = "OSS", |
|---|
| 124 | + [27][0] = "SDMA0", |
|---|
| 125 | + [28][0] = "VCN", |
|---|
| 126 | + [29][0] = "VCNU", |
|---|
| 127 | + [30][0] = "JPEG", |
|---|
| 128 | + [0][1] = "MP1", |
|---|
| 129 | + [1][1] = "MP0", |
|---|
| 130 | + [2][1] = "HDP", |
|---|
| 131 | + [3][1] = "XDP", |
|---|
| 132 | + [6][1] = "DBGU0", |
|---|
| 133 | + [7][1] = "DCEDMC", |
|---|
| 134 | + [8][1] = "DCEVGA", |
|---|
| 135 | + [9][1] = "DCEDWB", |
|---|
| 136 | + [26][1] = "OSS", |
|---|
| 137 | + [27][1] = "SDMA0", |
|---|
| 138 | + [28][1] = "VCN", |
|---|
| 139 | + [29][1] = "VCNU", |
|---|
| 140 | + [30][1] = "JPEG", |
|---|
| 141 | +}; |
|---|
| 142 | + |
|---|
| 143 | +static const char *mmhub_client_ids_vega10[][2] = { |
|---|
| 144 | + [0][0] = "MP0", |
|---|
| 145 | + [1][0] = "UVD", |
|---|
| 146 | + [2][0] = "UVDU", |
|---|
| 147 | + [3][0] = "HDP", |
|---|
| 148 | + [13][0] = "UTCL2", |
|---|
| 149 | + [14][0] = "OSS", |
|---|
| 150 | + [15][0] = "SDMA1", |
|---|
| 151 | + [32+0][0] = "VCE0", |
|---|
| 152 | + [32+1][0] = "VCE0U", |
|---|
| 153 | + [32+2][0] = "XDMA", |
|---|
| 154 | + [32+3][0] = "DCE", |
|---|
| 155 | + [32+4][0] = "MP1", |
|---|
| 156 | + [32+14][0] = "SDMA0", |
|---|
| 157 | + [0][1] = "MP0", |
|---|
| 158 | + [1][1] = "UVD", |
|---|
| 159 | + [2][1] = "UVDU", |
|---|
| 160 | + [3][1] = "DBGU0", |
|---|
| 161 | + [4][1] = "HDP", |
|---|
| 162 | + [5][1] = "XDP", |
|---|
| 163 | + [14][1] = "OSS", |
|---|
| 164 | + [15][1] = "SDMA0", |
|---|
| 165 | + [32+0][1] = "VCE0", |
|---|
| 166 | + [32+1][1] = "VCE0U", |
|---|
| 167 | + [32+2][1] = "XDMA", |
|---|
| 168 | + [32+3][1] = "DCE", |
|---|
| 169 | + [32+4][1] = "DCEDWB", |
|---|
| 170 | + [32+5][1] = "MP1", |
|---|
| 171 | + [32+6][1] = "DBGU1", |
|---|
| 172 | + [32+14][1] = "SDMA1", |
|---|
| 173 | +}; |
|---|
| 174 | + |
|---|
| 175 | +static const char *mmhub_client_ids_vega12[][2] = { |
|---|
| 176 | + [0][0] = "MP0", |
|---|
| 177 | + [1][0] = "VCE0", |
|---|
| 178 | + [2][0] = "VCE0U", |
|---|
| 179 | + [3][0] = "HDP", |
|---|
| 180 | + [13][0] = "UTCL2", |
|---|
| 181 | + [14][0] = "OSS", |
|---|
| 182 | + [15][0] = "SDMA1", |
|---|
| 183 | + [32+0][0] = "DCE", |
|---|
| 184 | + [32+1][0] = "XDMA", |
|---|
| 185 | + [32+2][0] = "UVD", |
|---|
| 186 | + [32+3][0] = "UVDU", |
|---|
| 187 | + [32+4][0] = "MP1", |
|---|
| 188 | + [32+15][0] = "SDMA0", |
|---|
| 189 | + [0][1] = "MP0", |
|---|
| 190 | + [1][1] = "VCE0", |
|---|
| 191 | + [2][1] = "VCE0U", |
|---|
| 192 | + [3][1] = "DBGU0", |
|---|
| 193 | + [4][1] = "HDP", |
|---|
| 194 | + [5][1] = "XDP", |
|---|
| 195 | + [14][1] = "OSS", |
|---|
| 196 | + [15][1] = "SDMA0", |
|---|
| 197 | + [32+0][1] = "DCE", |
|---|
| 198 | + [32+1][1] = "DCEDWB", |
|---|
| 199 | + [32+2][1] = "XDMA", |
|---|
| 200 | + [32+3][1] = "UVD", |
|---|
| 201 | + [32+4][1] = "UVDU", |
|---|
| 202 | + [32+5][1] = "MP1", |
|---|
| 203 | + [32+6][1] = "DBGU1", |
|---|
| 204 | + [32+15][1] = "SDMA1", |
|---|
| 205 | +}; |
|---|
| 206 | + |
|---|
| 207 | +static const char *mmhub_client_ids_vega20[][2] = { |
|---|
| 208 | + [0][0] = "XDMA", |
|---|
| 209 | + [1][0] = "DCE", |
|---|
| 210 | + [2][0] = "VCE0", |
|---|
| 211 | + [3][0] = "VCE0U", |
|---|
| 212 | + [4][0] = "UVD", |
|---|
| 213 | + [5][0] = "UVD1U", |
|---|
| 214 | + [13][0] = "OSS", |
|---|
| 215 | + [14][0] = "HDP", |
|---|
| 216 | + [15][0] = "SDMA0", |
|---|
| 217 | + [32+0][0] = "UVD", |
|---|
| 218 | + [32+1][0] = "UVDU", |
|---|
| 219 | + [32+2][0] = "MP1", |
|---|
| 220 | + [32+3][0] = "MP0", |
|---|
| 221 | + [32+12][0] = "UTCL2", |
|---|
| 222 | + [32+14][0] = "SDMA1", |
|---|
| 223 | + [0][1] = "XDMA", |
|---|
| 224 | + [1][1] = "DCE", |
|---|
| 225 | + [2][1] = "DCEDWB", |
|---|
| 226 | + [3][1] = "VCE0", |
|---|
| 227 | + [4][1] = "VCE0U", |
|---|
| 228 | + [5][1] = "UVD1", |
|---|
| 229 | + [6][1] = "UVD1U", |
|---|
| 230 | + [7][1] = "DBGU0", |
|---|
| 231 | + [8][1] = "XDP", |
|---|
| 232 | + [13][1] = "OSS", |
|---|
| 233 | + [14][1] = "HDP", |
|---|
| 234 | + [15][1] = "SDMA0", |
|---|
| 235 | + [32+0][1] = "UVD", |
|---|
| 236 | + [32+1][1] = "UVDU", |
|---|
| 237 | + [32+2][1] = "DBGU1", |
|---|
| 238 | + [32+3][1] = "MP1", |
|---|
| 239 | + [32+4][1] = "MP0", |
|---|
| 240 | + [32+14][1] = "SDMA1", |
|---|
| 241 | +}; |
|---|
| 242 | + |
|---|
| 243 | +static const char *mmhub_client_ids_arcturus[][2] = { |
|---|
| 244 | + [2][0] = "MP1", |
|---|
| 245 | + [3][0] = "MP0", |
|---|
| 246 | + [10][0] = "UTCL2", |
|---|
| 247 | + [13][0] = "OSS", |
|---|
| 248 | + [14][0] = "HDP", |
|---|
| 249 | + [15][0] = "SDMA0", |
|---|
| 250 | + [32+15][0] = "SDMA1", |
|---|
| 251 | + [64+15][0] = "SDMA2", |
|---|
| 252 | + [96+15][0] = "SDMA3", |
|---|
| 253 | + [128+15][0] = "SDMA4", |
|---|
| 254 | + [160+11][0] = "JPEG", |
|---|
| 255 | + [160+12][0] = "VCN", |
|---|
| 256 | + [160+13][0] = "VCNU", |
|---|
| 257 | + [160+15][0] = "SDMA5", |
|---|
| 258 | + [192+10][0] = "UTCL2", |
|---|
| 259 | + [192+11][0] = "JPEG1", |
|---|
| 260 | + [192+12][0] = "VCN1", |
|---|
| 261 | + [192+13][0] = "VCN1U", |
|---|
| 262 | + [192+15][0] = "SDMA6", |
|---|
| 263 | + [224+15][0] = "SDMA7", |
|---|
| 264 | + [0][1] = "DBGU1", |
|---|
| 265 | + [1][1] = "XDP", |
|---|
| 266 | + [2][1] = "MP1", |
|---|
| 267 | + [3][1] = "MP0", |
|---|
| 268 | + [13][1] = "OSS", |
|---|
| 269 | + [14][1] = "HDP", |
|---|
| 270 | + [15][1] = "SDMA0", |
|---|
| 271 | + [32+15][1] = "SDMA1", |
|---|
| 272 | + [64+15][1] = "SDMA2", |
|---|
| 273 | + [96+15][1] = "SDMA3", |
|---|
| 274 | + [128+15][1] = "SDMA4", |
|---|
| 275 | + [160+11][1] = "JPEG", |
|---|
| 276 | + [160+12][1] = "VCN", |
|---|
| 277 | + [160+13][1] = "VCNU", |
|---|
| 278 | + [160+15][1] = "SDMA5", |
|---|
| 279 | + [192+11][1] = "JPEG1", |
|---|
| 280 | + [192+12][1] = "VCN1", |
|---|
| 281 | + [192+13][1] = "VCN1U", |
|---|
| 282 | + [192+15][1] = "SDMA6", |
|---|
| 283 | + [224+15][1] = "SDMA7", |
|---|
| 284 | +}; |
|---|
| 58 | 285 | |
|---|
| 59 | 286 | static const u32 golden_settings_vega10_hdp[] = |
|---|
| 60 | 287 | { |
|---|
| .. | .. |
|---|
| 82 | 309 | SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) |
|---|
| 83 | 310 | }; |
|---|
| 84 | 311 | |
|---|
| 85 | | -/* Ecc related register addresses, (BASE + reg offset) */ |
|---|
| 86 | | -/* Universal Memory Controller caps (may be fused). */ |
|---|
| 87 | | -/* UMCCH:UmcLocalCap */ |
|---|
| 88 | | -#define UMCLOCALCAPS_ADDR0 (0x00014306 + 0x00000000) |
|---|
| 89 | | -#define UMCLOCALCAPS_ADDR1 (0x00014306 + 0x00000800) |
|---|
| 90 | | -#define UMCLOCALCAPS_ADDR2 (0x00014306 + 0x00001000) |
|---|
| 91 | | -#define UMCLOCALCAPS_ADDR3 (0x00014306 + 0x00001800) |
|---|
| 92 | | -#define UMCLOCALCAPS_ADDR4 (0x00054306 + 0x00000000) |
|---|
| 93 | | -#define UMCLOCALCAPS_ADDR5 (0x00054306 + 0x00000800) |
|---|
| 94 | | -#define UMCLOCALCAPS_ADDR6 (0x00054306 + 0x00001000) |
|---|
| 95 | | -#define UMCLOCALCAPS_ADDR7 (0x00054306 + 0x00001800) |
|---|
| 96 | | -#define UMCLOCALCAPS_ADDR8 (0x00094306 + 0x00000000) |
|---|
| 97 | | -#define UMCLOCALCAPS_ADDR9 (0x00094306 + 0x00000800) |
|---|
| 98 | | -#define UMCLOCALCAPS_ADDR10 (0x00094306 + 0x00001000) |
|---|
| 99 | | -#define UMCLOCALCAPS_ADDR11 (0x00094306 + 0x00001800) |
|---|
| 100 | | -#define UMCLOCALCAPS_ADDR12 (0x000d4306 + 0x00000000) |
|---|
| 101 | | -#define UMCLOCALCAPS_ADDR13 (0x000d4306 + 0x00000800) |
|---|
| 102 | | -#define UMCLOCALCAPS_ADDR14 (0x000d4306 + 0x00001000) |
|---|
| 103 | | -#define UMCLOCALCAPS_ADDR15 (0x000d4306 + 0x00001800) |
|---|
| 104 | | - |
|---|
| 105 | | -/* Universal Memory Controller Channel config. */ |
|---|
| 106 | | -/* UMCCH:UMC_CONFIG */ |
|---|
| 107 | | -#define UMCCH_UMC_CONFIG_ADDR0 (0x00014040 + 0x00000000) |
|---|
| 108 | | -#define UMCCH_UMC_CONFIG_ADDR1 (0x00014040 + 0x00000800) |
|---|
| 109 | | -#define UMCCH_UMC_CONFIG_ADDR2 (0x00014040 + 0x00001000) |
|---|
| 110 | | -#define UMCCH_UMC_CONFIG_ADDR3 (0x00014040 + 0x00001800) |
|---|
| 111 | | -#define UMCCH_UMC_CONFIG_ADDR4 (0x00054040 + 0x00000000) |
|---|
| 112 | | -#define UMCCH_UMC_CONFIG_ADDR5 (0x00054040 + 0x00000800) |
|---|
| 113 | | -#define UMCCH_UMC_CONFIG_ADDR6 (0x00054040 + 0x00001000) |
|---|
| 114 | | -#define UMCCH_UMC_CONFIG_ADDR7 (0x00054040 + 0x00001800) |
|---|
| 115 | | -#define UMCCH_UMC_CONFIG_ADDR8 (0x00094040 + 0x00000000) |
|---|
| 116 | | -#define UMCCH_UMC_CONFIG_ADDR9 (0x00094040 + 0x00000800) |
|---|
| 117 | | -#define UMCCH_UMC_CONFIG_ADDR10 (0x00094040 + 0x00001000) |
|---|
| 118 | | -#define UMCCH_UMC_CONFIG_ADDR11 (0x00094040 + 0x00001800) |
|---|
| 119 | | -#define UMCCH_UMC_CONFIG_ADDR12 (0x000d4040 + 0x00000000) |
|---|
| 120 | | -#define UMCCH_UMC_CONFIG_ADDR13 (0x000d4040 + 0x00000800) |
|---|
| 121 | | -#define UMCCH_UMC_CONFIG_ADDR14 (0x000d4040 + 0x00001000) |
|---|
| 122 | | -#define UMCCH_UMC_CONFIG_ADDR15 (0x000d4040 + 0x00001800) |
|---|
| 123 | | - |
|---|
| 124 | | -/* Universal Memory Controller Channel Ecc config. */ |
|---|
| 125 | | -/* UMCCH:EccCtrl */ |
|---|
| 126 | | -#define UMCCH_ECCCTRL_ADDR0 (0x00014053 + 0x00000000) |
|---|
| 127 | | -#define UMCCH_ECCCTRL_ADDR1 (0x00014053 + 0x00000800) |
|---|
| 128 | | -#define UMCCH_ECCCTRL_ADDR2 (0x00014053 + 0x00001000) |
|---|
| 129 | | -#define UMCCH_ECCCTRL_ADDR3 (0x00014053 + 0x00001800) |
|---|
| 130 | | -#define UMCCH_ECCCTRL_ADDR4 (0x00054053 + 0x00000000) |
|---|
| 131 | | -#define UMCCH_ECCCTRL_ADDR5 (0x00054053 + 0x00000800) |
|---|
| 132 | | -#define UMCCH_ECCCTRL_ADDR6 (0x00054053 + 0x00001000) |
|---|
| 133 | | -#define UMCCH_ECCCTRL_ADDR7 (0x00054053 + 0x00001800) |
|---|
| 134 | | -#define UMCCH_ECCCTRL_ADDR8 (0x00094053 + 0x00000000) |
|---|
| 135 | | -#define UMCCH_ECCCTRL_ADDR9 (0x00094053 + 0x00000800) |
|---|
| 136 | | -#define UMCCH_ECCCTRL_ADDR10 (0x00094053 + 0x00001000) |
|---|
| 137 | | -#define UMCCH_ECCCTRL_ADDR11 (0x00094053 + 0x00001800) |
|---|
| 138 | | -#define UMCCH_ECCCTRL_ADDR12 (0x000d4053 + 0x00000000) |
|---|
| 139 | | -#define UMCCH_ECCCTRL_ADDR13 (0x000d4053 + 0x00000800) |
|---|
| 140 | | -#define UMCCH_ECCCTRL_ADDR14 (0x000d4053 + 0x00001000) |
|---|
| 141 | | -#define UMCCH_ECCCTRL_ADDR15 (0x000d4053 + 0x00001800) |
|---|
| 142 | | - |
|---|
| 143 | | -static const uint32_t ecc_umclocalcap_addrs[] = { |
|---|
| 144 | | - UMCLOCALCAPS_ADDR0, |
|---|
| 145 | | - UMCLOCALCAPS_ADDR1, |
|---|
| 146 | | - UMCLOCALCAPS_ADDR2, |
|---|
| 147 | | - UMCLOCALCAPS_ADDR3, |
|---|
| 148 | | - UMCLOCALCAPS_ADDR4, |
|---|
| 149 | | - UMCLOCALCAPS_ADDR5, |
|---|
| 150 | | - UMCLOCALCAPS_ADDR6, |
|---|
| 151 | | - UMCLOCALCAPS_ADDR7, |
|---|
| 152 | | - UMCLOCALCAPS_ADDR8, |
|---|
| 153 | | - UMCLOCALCAPS_ADDR9, |
|---|
| 154 | | - UMCLOCALCAPS_ADDR10, |
|---|
| 155 | | - UMCLOCALCAPS_ADDR11, |
|---|
| 156 | | - UMCLOCALCAPS_ADDR12, |
|---|
| 157 | | - UMCLOCALCAPS_ADDR13, |
|---|
| 158 | | - UMCLOCALCAPS_ADDR14, |
|---|
| 159 | | - UMCLOCALCAPS_ADDR15, |
|---|
| 312 | +static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = { |
|---|
| 313 | + (0x000143c0 + 0x00000000), |
|---|
| 314 | + (0x000143c0 + 0x00000800), |
|---|
| 315 | + (0x000143c0 + 0x00001000), |
|---|
| 316 | + (0x000143c0 + 0x00001800), |
|---|
| 317 | + (0x000543c0 + 0x00000000), |
|---|
| 318 | + (0x000543c0 + 0x00000800), |
|---|
| 319 | + (0x000543c0 + 0x00001000), |
|---|
| 320 | + (0x000543c0 + 0x00001800), |
|---|
| 321 | + (0x000943c0 + 0x00000000), |
|---|
| 322 | + (0x000943c0 + 0x00000800), |
|---|
| 323 | + (0x000943c0 + 0x00001000), |
|---|
| 324 | + (0x000943c0 + 0x00001800), |
|---|
| 325 | + (0x000d43c0 + 0x00000000), |
|---|
| 326 | + (0x000d43c0 + 0x00000800), |
|---|
| 327 | + (0x000d43c0 + 0x00001000), |
|---|
| 328 | + (0x000d43c0 + 0x00001800), |
|---|
| 329 | + (0x001143c0 + 0x00000000), |
|---|
| 330 | + (0x001143c0 + 0x00000800), |
|---|
| 331 | + (0x001143c0 + 0x00001000), |
|---|
| 332 | + (0x001143c0 + 0x00001800), |
|---|
| 333 | + (0x001543c0 + 0x00000000), |
|---|
| 334 | + (0x001543c0 + 0x00000800), |
|---|
| 335 | + (0x001543c0 + 0x00001000), |
|---|
| 336 | + (0x001543c0 + 0x00001800), |
|---|
| 337 | + (0x001943c0 + 0x00000000), |
|---|
| 338 | + (0x001943c0 + 0x00000800), |
|---|
| 339 | + (0x001943c0 + 0x00001000), |
|---|
| 340 | + (0x001943c0 + 0x00001800), |
|---|
| 341 | + (0x001d43c0 + 0x00000000), |
|---|
| 342 | + (0x001d43c0 + 0x00000800), |
|---|
| 343 | + (0x001d43c0 + 0x00001000), |
|---|
| 344 | + (0x001d43c0 + 0x00001800), |
|---|
| 160 | 345 | }; |
|---|
| 161 | 346 | |
|---|
| 162 | | -static const uint32_t ecc_umcch_umc_config_addrs[] = { |
|---|
| 163 | | - UMCCH_UMC_CONFIG_ADDR0, |
|---|
| 164 | | - UMCCH_UMC_CONFIG_ADDR1, |
|---|
| 165 | | - UMCCH_UMC_CONFIG_ADDR2, |
|---|
| 166 | | - UMCCH_UMC_CONFIG_ADDR3, |
|---|
| 167 | | - UMCCH_UMC_CONFIG_ADDR4, |
|---|
| 168 | | - UMCCH_UMC_CONFIG_ADDR5, |
|---|
| 169 | | - UMCCH_UMC_CONFIG_ADDR6, |
|---|
| 170 | | - UMCCH_UMC_CONFIG_ADDR7, |
|---|
| 171 | | - UMCCH_UMC_CONFIG_ADDR8, |
|---|
| 172 | | - UMCCH_UMC_CONFIG_ADDR9, |
|---|
| 173 | | - UMCCH_UMC_CONFIG_ADDR10, |
|---|
| 174 | | - UMCCH_UMC_CONFIG_ADDR11, |
|---|
| 175 | | - UMCCH_UMC_CONFIG_ADDR12, |
|---|
| 176 | | - UMCCH_UMC_CONFIG_ADDR13, |
|---|
| 177 | | - UMCCH_UMC_CONFIG_ADDR14, |
|---|
| 178 | | - UMCCH_UMC_CONFIG_ADDR15, |
|---|
| 347 | +static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { |
|---|
| 348 | + (0x000143e0 + 0x00000000), |
|---|
| 349 | + (0x000143e0 + 0x00000800), |
|---|
| 350 | + (0x000143e0 + 0x00001000), |
|---|
| 351 | + (0x000143e0 + 0x00001800), |
|---|
| 352 | + (0x000543e0 + 0x00000000), |
|---|
| 353 | + (0x000543e0 + 0x00000800), |
|---|
| 354 | + (0x000543e0 + 0x00001000), |
|---|
| 355 | + (0x000543e0 + 0x00001800), |
|---|
| 356 | + (0x000943e0 + 0x00000000), |
|---|
| 357 | + (0x000943e0 + 0x00000800), |
|---|
| 358 | + (0x000943e0 + 0x00001000), |
|---|
| 359 | + (0x000943e0 + 0x00001800), |
|---|
| 360 | + (0x000d43e0 + 0x00000000), |
|---|
| 361 | + (0x000d43e0 + 0x00000800), |
|---|
| 362 | + (0x000d43e0 + 0x00001000), |
|---|
| 363 | + (0x000d43e0 + 0x00001800), |
|---|
| 364 | + (0x001143e0 + 0x00000000), |
|---|
| 365 | + (0x001143e0 + 0x00000800), |
|---|
| 366 | + (0x001143e0 + 0x00001000), |
|---|
| 367 | + (0x001143e0 + 0x00001800), |
|---|
| 368 | + (0x001543e0 + 0x00000000), |
|---|
| 369 | + (0x001543e0 + 0x00000800), |
|---|
| 370 | + (0x001543e0 + 0x00001000), |
|---|
| 371 | + (0x001543e0 + 0x00001800), |
|---|
| 372 | + (0x001943e0 + 0x00000000), |
|---|
| 373 | + (0x001943e0 + 0x00000800), |
|---|
| 374 | + (0x001943e0 + 0x00001000), |
|---|
| 375 | + (0x001943e0 + 0x00001800), |
|---|
| 376 | + (0x001d43e0 + 0x00000000), |
|---|
| 377 | + (0x001d43e0 + 0x00000800), |
|---|
| 378 | + (0x001d43e0 + 0x00001000), |
|---|
| 379 | + (0x001d43e0 + 0x00001800), |
|---|
| 179 | 380 | }; |
|---|
| 180 | 381 | |
|---|
| 181 | | -static const uint32_t ecc_umcch_eccctrl_addrs[] = { |
|---|
| 182 | | - UMCCH_ECCCTRL_ADDR0, |
|---|
| 183 | | - UMCCH_ECCCTRL_ADDR1, |
|---|
| 184 | | - UMCCH_ECCCTRL_ADDR2, |
|---|
| 185 | | - UMCCH_ECCCTRL_ADDR3, |
|---|
| 186 | | - UMCCH_ECCCTRL_ADDR4, |
|---|
| 187 | | - UMCCH_ECCCTRL_ADDR5, |
|---|
| 188 | | - UMCCH_ECCCTRL_ADDR6, |
|---|
| 189 | | - UMCCH_ECCCTRL_ADDR7, |
|---|
| 190 | | - UMCCH_ECCCTRL_ADDR8, |
|---|
| 191 | | - UMCCH_ECCCTRL_ADDR9, |
|---|
| 192 | | - UMCCH_ECCCTRL_ADDR10, |
|---|
| 193 | | - UMCCH_ECCCTRL_ADDR11, |
|---|
| 194 | | - UMCCH_ECCCTRL_ADDR12, |
|---|
| 195 | | - UMCCH_ECCCTRL_ADDR13, |
|---|
| 196 | | - UMCCH_ECCCTRL_ADDR14, |
|---|
| 197 | | - UMCCH_ECCCTRL_ADDR15, |
|---|
| 382 | +static const uint32_t ecc_umc_mcumc_status_addrs[] = { |
|---|
| 383 | + (0x000143c2 + 0x00000000), |
|---|
| 384 | + (0x000143c2 + 0x00000800), |
|---|
| 385 | + (0x000143c2 + 0x00001000), |
|---|
| 386 | + (0x000143c2 + 0x00001800), |
|---|
| 387 | + (0x000543c2 + 0x00000000), |
|---|
| 388 | + (0x000543c2 + 0x00000800), |
|---|
| 389 | + (0x000543c2 + 0x00001000), |
|---|
| 390 | + (0x000543c2 + 0x00001800), |
|---|
| 391 | + (0x000943c2 + 0x00000000), |
|---|
| 392 | + (0x000943c2 + 0x00000800), |
|---|
| 393 | + (0x000943c2 + 0x00001000), |
|---|
| 394 | + (0x000943c2 + 0x00001800), |
|---|
| 395 | + (0x000d43c2 + 0x00000000), |
|---|
| 396 | + (0x000d43c2 + 0x00000800), |
|---|
| 397 | + (0x000d43c2 + 0x00001000), |
|---|
| 398 | + (0x000d43c2 + 0x00001800), |
|---|
| 399 | + (0x001143c2 + 0x00000000), |
|---|
| 400 | + (0x001143c2 + 0x00000800), |
|---|
| 401 | + (0x001143c2 + 0x00001000), |
|---|
| 402 | + (0x001143c2 + 0x00001800), |
|---|
| 403 | + (0x001543c2 + 0x00000000), |
|---|
| 404 | + (0x001543c2 + 0x00000800), |
|---|
| 405 | + (0x001543c2 + 0x00001000), |
|---|
| 406 | + (0x001543c2 + 0x00001800), |
|---|
| 407 | + (0x001943c2 + 0x00000000), |
|---|
| 408 | + (0x001943c2 + 0x00000800), |
|---|
| 409 | + (0x001943c2 + 0x00001000), |
|---|
| 410 | + (0x001943c2 + 0x00001800), |
|---|
| 411 | + (0x001d43c2 + 0x00000000), |
|---|
| 412 | + (0x001d43c2 + 0x00000800), |
|---|
| 413 | + (0x001d43c2 + 0x00001000), |
|---|
| 414 | + (0x001d43c2 + 0x00001800), |
|---|
| 198 | 415 | }; |
|---|
| 416 | + |
|---|
| 417 | +static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, |
|---|
| 418 | + struct amdgpu_irq_src *src, |
|---|
| 419 | + unsigned type, |
|---|
| 420 | + enum amdgpu_interrupt_state state) |
|---|
| 421 | +{ |
|---|
| 422 | + u32 bits, i, tmp, reg; |
|---|
| 423 | + |
|---|
| 424 | + /* Devices newer then VEGA10/12 shall have these programming |
|---|
| 425 | + sequences performed by PSP BL */ |
|---|
| 426 | + if (adev->asic_type >= CHIP_VEGA20) |
|---|
| 427 | + return 0; |
|---|
| 428 | + |
|---|
| 429 | + bits = 0x7f; |
|---|
| 430 | + |
|---|
| 431 | + switch (state) { |
|---|
| 432 | + case AMDGPU_IRQ_STATE_DISABLE: |
|---|
| 433 | + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { |
|---|
| 434 | + reg = ecc_umc_mcumc_ctrl_addrs[i]; |
|---|
| 435 | + tmp = RREG32(reg); |
|---|
| 436 | + tmp &= ~bits; |
|---|
| 437 | + WREG32(reg, tmp); |
|---|
| 438 | + } |
|---|
| 439 | + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { |
|---|
| 440 | + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; |
|---|
| 441 | + tmp = RREG32(reg); |
|---|
| 442 | + tmp &= ~bits; |
|---|
| 443 | + WREG32(reg, tmp); |
|---|
| 444 | + } |
|---|
| 445 | + break; |
|---|
| 446 | + case AMDGPU_IRQ_STATE_ENABLE: |
|---|
| 447 | + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { |
|---|
| 448 | + reg = ecc_umc_mcumc_ctrl_addrs[i]; |
|---|
| 449 | + tmp = RREG32(reg); |
|---|
| 450 | + tmp |= bits; |
|---|
| 451 | + WREG32(reg, tmp); |
|---|
| 452 | + } |
|---|
| 453 | + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { |
|---|
| 454 | + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; |
|---|
| 455 | + tmp = RREG32(reg); |
|---|
| 456 | + tmp |= bits; |
|---|
| 457 | + WREG32(reg, tmp); |
|---|
| 458 | + } |
|---|
| 459 | + break; |
|---|
| 460 | + default: |
|---|
| 461 | + break; |
|---|
| 462 | + } |
|---|
| 463 | + |
|---|
| 464 | + return 0; |
|---|
| 465 | +} |
|---|
| 199 | 466 | |
|---|
| 200 | 467 | static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, |
|---|
| 201 | 468 | struct amdgpu_irq_src *src, |
|---|
| .. | .. |
|---|
| 215 | 482 | |
|---|
| 216 | 483 | switch (state) { |
|---|
| 217 | 484 | case AMDGPU_IRQ_STATE_DISABLE: |
|---|
| 218 | | - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { |
|---|
| 485 | + for (j = 0; j < adev->num_vmhubs; j++) { |
|---|
| 219 | 486 | hub = &adev->vmhub[j]; |
|---|
| 220 | 487 | for (i = 0; i < 16; i++) { |
|---|
| 221 | 488 | reg = hub->vm_context0_cntl + i; |
|---|
| .. | .. |
|---|
| 226 | 493 | } |
|---|
| 227 | 494 | break; |
|---|
| 228 | 495 | case AMDGPU_IRQ_STATE_ENABLE: |
|---|
| 229 | | - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { |
|---|
| 496 | + for (j = 0; j < adev->num_vmhubs; j++) { |
|---|
| 230 | 497 | hub = &adev->vmhub[j]; |
|---|
| 231 | 498 | for (i = 0; i < 16; i++) { |
|---|
| 232 | 499 | reg = hub->vm_context0_cntl + i; |
|---|
| .. | .. |
|---|
| 246 | 513 | struct amdgpu_irq_src *source, |
|---|
| 247 | 514 | struct amdgpu_iv_entry *entry) |
|---|
| 248 | 515 | { |
|---|
| 249 | | - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; |
|---|
| 250 | | - uint32_t status = 0; |
|---|
| 516 | + struct amdgpu_vmhub *hub; |
|---|
| 517 | + bool retry_fault = !!(entry->src_data[1] & 0x80); |
|---|
| 518 | + uint32_t status = 0, cid = 0, rw = 0; |
|---|
| 251 | 519 | u64 addr; |
|---|
| 520 | + char hub_name[10]; |
|---|
| 521 | + const char *mmhub_cid; |
|---|
| 252 | 522 | |
|---|
| 253 | 523 | addr = (u64)entry->src_data[0] << 12; |
|---|
| 254 | 524 | addr |= ((u64)entry->src_data[1] & 0xf) << 44; |
|---|
| 255 | 525 | |
|---|
| 526 | + if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, |
|---|
| 527 | + entry->timestamp)) |
|---|
| 528 | + return 1; /* This also prevents sending it to KFD */ |
|---|
| 529 | + |
|---|
| 530 | + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { |
|---|
| 531 | + snprintf(hub_name, sizeof(hub_name), "mmhub0"); |
|---|
| 532 | + hub = &adev->vmhub[AMDGPU_MMHUB_0]; |
|---|
| 533 | + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { |
|---|
| 534 | + snprintf(hub_name, sizeof(hub_name), "mmhub1"); |
|---|
| 535 | + hub = &adev->vmhub[AMDGPU_MMHUB_1]; |
|---|
| 536 | + } else { |
|---|
| 537 | + snprintf(hub_name, sizeof(hub_name), "gfxhub0"); |
|---|
| 538 | + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; |
|---|
| 539 | + } |
|---|
| 540 | + |
|---|
| 541 | + /* If it's the first fault for this address, process it normally */ |
|---|
| 542 | + if (retry_fault && !in_interrupt() && |
|---|
| 543 | + amdgpu_vm_handle_fault(adev, entry->pasid, addr)) |
|---|
| 544 | + return 1; /* This also prevents sending it to KFD */ |
|---|
| 545 | + |
|---|
| 256 | 546 | if (!amdgpu_sriov_vf(adev)) { |
|---|
| 547 | + /* |
|---|
| 548 | + * Issue a dummy read to wait for the status register to |
|---|
| 549 | + * be updated to avoid reading an incorrect value due to |
|---|
| 550 | + * the new fast GRBM interface. |
|---|
| 551 | + */ |
|---|
| 552 | + if (entry->vmid_src == AMDGPU_GFXHUB_0) |
|---|
| 553 | + RREG32(hub->vm_l2_pro_fault_status); |
|---|
| 554 | + |
|---|
| 257 | 555 | status = RREG32(hub->vm_l2_pro_fault_status); |
|---|
| 556 | + cid = REG_GET_FIELD(status, |
|---|
| 557 | + VM_L2_PROTECTION_FAULT_STATUS, CID); |
|---|
| 558 | + rw = REG_GET_FIELD(status, |
|---|
| 559 | + VM_L2_PROTECTION_FAULT_STATUS, RW); |
|---|
| 258 | 560 | WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); |
|---|
| 259 | 561 | } |
|---|
| 260 | 562 | |
|---|
| 261 | 563 | if (printk_ratelimit()) { |
|---|
| 262 | | - struct amdgpu_task_info task_info = { 0 }; |
|---|
| 564 | + struct amdgpu_task_info task_info; |
|---|
| 263 | 565 | |
|---|
| 566 | + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); |
|---|
| 264 | 567 | amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); |
|---|
| 265 | 568 | |
|---|
| 266 | 569 | dev_err(adev->dev, |
|---|
| 267 | | - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", |
|---|
| 268 | | - entry->vmid_src ? "mmhub" : "gfxhub", |
|---|
| 570 | + "[%s] %s page fault (src_id:%u ring:%u vmid:%u " |
|---|
| 571 | + "pasid:%u, for process %s pid %d thread %s pid %d)\n", |
|---|
| 572 | + hub_name, retry_fault ? "retry" : "no-retry", |
|---|
| 269 | 573 | entry->src_id, entry->ring_id, entry->vmid, |
|---|
| 270 | 574 | entry->pasid, task_info.process_name, task_info.tgid, |
|---|
| 271 | 575 | task_info.task_name, task_info.pid); |
|---|
| 272 | | - dev_err(adev->dev, " at address 0x%016llx from %d\n", |
|---|
| 576 | + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", |
|---|
| 273 | 577 | addr, entry->client_id); |
|---|
| 274 | | - if (!amdgpu_sriov_vf(adev)) |
|---|
| 578 | + if (!amdgpu_sriov_vf(adev)) { |
|---|
| 275 | 579 | dev_err(adev->dev, |
|---|
| 276 | 580 | "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", |
|---|
| 277 | 581 | status); |
|---|
| 582 | + if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { |
|---|
| 583 | + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", |
|---|
| 584 | + cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], |
|---|
| 585 | + cid); |
|---|
| 586 | + } else { |
|---|
| 587 | + switch (adev->asic_type) { |
|---|
| 588 | + case CHIP_VEGA10: |
|---|
| 589 | + mmhub_cid = mmhub_client_ids_vega10[cid][rw]; |
|---|
| 590 | + break; |
|---|
| 591 | + case CHIP_VEGA12: |
|---|
| 592 | + mmhub_cid = mmhub_client_ids_vega12[cid][rw]; |
|---|
| 593 | + break; |
|---|
| 594 | + case CHIP_VEGA20: |
|---|
| 595 | + mmhub_cid = mmhub_client_ids_vega20[cid][rw]; |
|---|
| 596 | + break; |
|---|
| 597 | + case CHIP_ARCTURUS: |
|---|
| 598 | + mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; |
|---|
| 599 | + break; |
|---|
| 600 | + case CHIP_RAVEN: |
|---|
| 601 | + mmhub_cid = mmhub_client_ids_raven[cid][rw]; |
|---|
| 602 | + break; |
|---|
| 603 | + case CHIP_RENOIR: |
|---|
| 604 | + mmhub_cid = mmhub_client_ids_renoir[cid][rw]; |
|---|
| 605 | + break; |
|---|
| 606 | + default: |
|---|
| 607 | + mmhub_cid = NULL; |
|---|
| 608 | + break; |
|---|
| 609 | + } |
|---|
| 610 | + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", |
|---|
| 611 | + mmhub_cid ? mmhub_cid : "unknown", cid); |
|---|
| 612 | + } |
|---|
| 613 | + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", |
|---|
| 614 | + REG_GET_FIELD(status, |
|---|
| 615 | + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); |
|---|
| 616 | + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", |
|---|
| 617 | + REG_GET_FIELD(status, |
|---|
| 618 | + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); |
|---|
| 619 | + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", |
|---|
| 620 | + REG_GET_FIELD(status, |
|---|
| 621 | + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); |
|---|
| 622 | + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", |
|---|
| 623 | + REG_GET_FIELD(status, |
|---|
| 624 | + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); |
|---|
| 625 | + dev_err(adev->dev, "\t RW: 0x%x\n", rw); |
|---|
| 626 | + } |
|---|
| 278 | 627 | } |
|---|
| 279 | 628 | |
|---|
| 280 | 629 | return 0; |
|---|
| .. | .. |
|---|
| 285 | 634 | .process = gmc_v9_0_process_interrupt, |
|---|
| 286 | 635 | }; |
|---|
| 287 | 636 | |
|---|
| 637 | + |
|---|
| 638 | +static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { |
|---|
| 639 | + .set = gmc_v9_0_ecc_interrupt_state, |
|---|
| 640 | + .process = amdgpu_umc_process_ecc_irq, |
|---|
| 641 | +}; |
|---|
| 642 | + |
|---|
| 288 | 643 | static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) |
|---|
| 289 | 644 | { |
|---|
| 290 | 645 | adev->gmc.vm_fault.num_types = 1; |
|---|
| 291 | 646 | adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; |
|---|
| 647 | + |
|---|
| 648 | + if (!amdgpu_sriov_vf(adev)) { |
|---|
| 649 | + adev->gmc.ecc_irq.num_types = 1; |
|---|
| 650 | + adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; |
|---|
| 651 | + } |
|---|
| 292 | 652 | } |
|---|
| 293 | 653 | |
|---|
| 294 | | -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) |
|---|
| 654 | +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, |
|---|
| 655 | + uint32_t flush_type) |
|---|
| 295 | 656 | { |
|---|
| 296 | 657 | u32 req = 0; |
|---|
| 297 | 658 | |
|---|
| 298 | | - /* invalidate using legacy mode on vmid*/ |
|---|
| 299 | 659 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, |
|---|
| 300 | 660 | PER_VMID_INVALIDATE_REQ, 1 << vmid); |
|---|
| 301 | | - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); |
|---|
| 661 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); |
|---|
| 302 | 662 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); |
|---|
| 303 | 663 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); |
|---|
| 304 | 664 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); |
|---|
| .. | .. |
|---|
| 310 | 670 | return req; |
|---|
| 311 | 671 | } |
|---|
| 312 | 672 | |
|---|
| 673 | +/** |
|---|
| 674 | + * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore |
|---|
| 675 | + * |
|---|
| 676 | + * @adev: amdgpu_device pointer |
|---|
| 677 | + * @vmhub: vmhub type |
|---|
| 678 | + * |
|---|
| 679 | + */ |
|---|
| 680 | +static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, |
|---|
| 681 | + uint32_t vmhub) |
|---|
| 682 | +{ |
|---|
| 683 | + return ((vmhub == AMDGPU_MMHUB_0 || |
|---|
| 684 | + vmhub == AMDGPU_MMHUB_1) && |
|---|
| 685 | + (!amdgpu_sriov_vf(adev)) && |
|---|
| 686 | + (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) && |
|---|
| 687 | + (adev->apu_flags & AMD_APU_IS_PICASSO)))); |
|---|
| 688 | +} |
|---|
| 689 | + |
|---|
| 690 | +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, |
|---|
| 691 | + uint8_t vmid, uint16_t *p_pasid) |
|---|
| 692 | +{ |
|---|
| 693 | + uint32_t value; |
|---|
| 694 | + |
|---|
| 695 | + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) |
|---|
| 696 | + + vmid); |
|---|
| 697 | + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
|---|
| 698 | + |
|---|
| 699 | + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); |
|---|
| 700 | +} |
|---|
| 701 | + |
|---|
| 313 | 702 | /* |
|---|
| 314 | 703 | * GART |
|---|
| 315 | 704 | * VMID 0 is the physical GPU addresses as used by the kernel. |
|---|
| .. | .. |
|---|
| 318 | 707 | */ |
|---|
| 319 | 708 | |
|---|
| 320 | 709 | /** |
|---|
| 321 | | - * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback |
|---|
| 710 | + * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type |
|---|
| 322 | 711 | * |
|---|
| 323 | 712 | * @adev: amdgpu_device pointer |
|---|
| 324 | 713 | * @vmid: vm instance to flush |
|---|
| 714 | + * @flush_type: the flush type |
|---|
| 325 | 715 | * |
|---|
| 326 | | - * Flush the TLB for the requested page table. |
|---|
| 716 | + * Flush the TLB for the requested page table using certain type. |
|---|
| 327 | 717 | */ |
|---|
| 328 | | -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, |
|---|
| 329 | | - uint32_t vmid) |
|---|
| 718 | +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, |
|---|
| 719 | + uint32_t vmhub, uint32_t flush_type) |
|---|
| 330 | 720 | { |
|---|
| 331 | | - /* Use register 17 for GART */ |
|---|
| 721 | + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); |
|---|
| 332 | 722 | const unsigned eng = 17; |
|---|
| 333 | | - unsigned i, j; |
|---|
| 723 | + u32 j, inv_req, inv_req2, tmp; |
|---|
| 724 | + struct amdgpu_vmhub *hub; |
|---|
| 725 | + |
|---|
| 726 | + BUG_ON(vmhub >= adev->num_vmhubs); |
|---|
| 727 | + |
|---|
| 728 | + hub = &adev->vmhub[vmhub]; |
|---|
| 729 | + if (adev->gmc.xgmi.num_physical_nodes && |
|---|
| 730 | + adev->asic_type == CHIP_VEGA20) { |
|---|
| 731 | + /* Vega20+XGMI caches PTEs in TC and TLB. Add a |
|---|
| 732 | + * heavy-weight TLB flush (type 2), which flushes |
|---|
| 733 | + * both. Due to a race condition with concurrent |
|---|
| 734 | + * memory accesses using the same TLB cache line, we |
|---|
| 735 | + * still need a second TLB flush after this. |
|---|
| 736 | + */ |
|---|
| 737 | + inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); |
|---|
| 738 | + inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); |
|---|
| 739 | + } else { |
|---|
| 740 | + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); |
|---|
| 741 | + inv_req2 = 0; |
|---|
| 742 | + } |
|---|
| 743 | + |
|---|
| 744 | + /* This is necessary for a HW workaround under SRIOV as well |
|---|
| 745 | + * as GFXOFF under bare metal |
|---|
| 746 | + */ |
|---|
| 747 | + if (adev->gfx.kiq.ring.sched.ready && |
|---|
| 748 | + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && |
|---|
| 749 | + down_read_trylock(&adev->reset_sem)) { |
|---|
| 750 | + uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; |
|---|
| 751 | + uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; |
|---|
| 752 | + |
|---|
| 753 | + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, |
|---|
| 754 | + 1 << vmid); |
|---|
| 755 | + up_read(&adev->reset_sem); |
|---|
| 756 | + return; |
|---|
| 757 | + } |
|---|
| 334 | 758 | |
|---|
| 335 | 759 | spin_lock(&adev->gmc.invalidate_lock); |
|---|
| 336 | 760 | |
|---|
| 337 | | - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { |
|---|
| 338 | | - struct amdgpu_vmhub *hub = &adev->vmhub[i]; |
|---|
| 339 | | - u32 tmp = gmc_v9_0_get_invalidate_req(vmid); |
|---|
| 761 | + /* |
|---|
| 762 | + * It may lose gpuvm invalidate acknowldege state across power-gating |
|---|
| 763 | + * off cycle, add semaphore acquire before invalidation and semaphore |
|---|
| 764 | + * release after invalidation to avoid entering power gated state |
|---|
| 765 | + * to WA the Issue |
|---|
| 766 | + */ |
|---|
| 340 | 767 | |
|---|
| 341 | | - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); |
|---|
| 342 | | - |
|---|
| 343 | | - /* Busy wait for ACK.*/ |
|---|
| 344 | | - for (j = 0; j < 100; j++) { |
|---|
| 345 | | - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); |
|---|
| 346 | | - tmp &= 1 << vmid; |
|---|
| 347 | | - if (tmp) |
|---|
| 348 | | - break; |
|---|
| 349 | | - cpu_relax(); |
|---|
| 350 | | - } |
|---|
| 351 | | - if (j < 100) |
|---|
| 352 | | - continue; |
|---|
| 353 | | - |
|---|
| 354 | | - /* Wait for ACK with a delay.*/ |
|---|
| 768 | + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ |
|---|
| 769 | + if (use_semaphore) { |
|---|
| 355 | 770 | for (j = 0; j < adev->usec_timeout; j++) { |
|---|
| 356 | | - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); |
|---|
| 357 | | - tmp &= 1 << vmid; |
|---|
| 358 | | - if (tmp) |
|---|
| 771 | + /* a read return value of 1 means semaphore acuqire */ |
|---|
| 772 | + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + |
|---|
| 773 | + hub->eng_distance * eng); |
|---|
| 774 | + if (tmp & 0x1) |
|---|
| 359 | 775 | break; |
|---|
| 360 | 776 | udelay(1); |
|---|
| 361 | 777 | } |
|---|
| 362 | | - if (j < adev->usec_timeout) |
|---|
| 363 | | - continue; |
|---|
| 364 | 778 | |
|---|
| 365 | | - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); |
|---|
| 779 | + if (j >= adev->usec_timeout) |
|---|
| 780 | + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); |
|---|
| 366 | 781 | } |
|---|
| 367 | 782 | |
|---|
| 783 | + do { |
|---|
| 784 | + WREG32_NO_KIQ(hub->vm_inv_eng0_req + |
|---|
| 785 | + hub->eng_distance * eng, inv_req); |
|---|
| 786 | + |
|---|
| 787 | + /* |
|---|
| 788 | + * Issue a dummy read to wait for the ACK register to |
|---|
| 789 | + * be cleared to avoid a false ACK due to the new fast |
|---|
| 790 | + * GRBM interface. |
|---|
| 791 | + */ |
|---|
| 792 | + if (vmhub == AMDGPU_GFXHUB_0) |
|---|
| 793 | + RREG32_NO_KIQ(hub->vm_inv_eng0_req + |
|---|
| 794 | + hub->eng_distance * eng); |
|---|
| 795 | + |
|---|
| 796 | + for (j = 0; j < adev->usec_timeout; j++) { |
|---|
| 797 | + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + |
|---|
| 798 | + hub->eng_distance * eng); |
|---|
| 799 | + if (tmp & (1 << vmid)) |
|---|
| 800 | + break; |
|---|
| 801 | + udelay(1); |
|---|
| 802 | + } |
|---|
| 803 | + |
|---|
| 804 | + inv_req = inv_req2; |
|---|
| 805 | + inv_req2 = 0; |
|---|
| 806 | + } while (inv_req); |
|---|
| 807 | + |
|---|
| 808 | + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ |
|---|
| 809 | + if (use_semaphore) |
|---|
| 810 | + /* |
|---|
| 811 | + * add semaphore release after invalidation, |
|---|
| 812 | + * write with 0 means semaphore release |
|---|
| 813 | + */ |
|---|
| 814 | + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + |
|---|
| 815 | + hub->eng_distance * eng, 0); |
|---|
| 816 | + |
|---|
| 368 | 817 | spin_unlock(&adev->gmc.invalidate_lock); |
|---|
| 818 | + |
|---|
| 819 | + if (j < adev->usec_timeout) |
|---|
| 820 | + return; |
|---|
| 821 | + |
|---|
| 822 | + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); |
|---|
| 823 | +} |
|---|
| 824 | + |
|---|
| 825 | +/** |
|---|
| 826 | + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid |
|---|
| 827 | + * |
|---|
| 828 | + * @adev: amdgpu_device pointer |
|---|
| 829 | + * @pasid: pasid to be flush |
|---|
| 830 | + * |
|---|
| 831 | + * Flush the TLB for the requested pasid. |
|---|
| 832 | + */ |
|---|
| 833 | +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, |
|---|
| 834 | + uint16_t pasid, uint32_t flush_type, |
|---|
| 835 | + bool all_hub) |
|---|
| 836 | +{ |
|---|
| 837 | + int vmid, i; |
|---|
| 838 | + signed long r; |
|---|
| 839 | + uint32_t seq; |
|---|
| 840 | + uint16_t queried_pasid; |
|---|
| 841 | + bool ret; |
|---|
| 842 | + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; |
|---|
| 843 | + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; |
|---|
| 844 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
|---|
| 845 | + |
|---|
| 846 | + if (amdgpu_in_reset(adev)) |
|---|
| 847 | + return -EIO; |
|---|
| 848 | + |
|---|
| 849 | + if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) { |
|---|
| 850 | + /* Vega20+XGMI caches PTEs in TC and TLB. Add a |
|---|
| 851 | + * heavy-weight TLB flush (type 2), which flushes |
|---|
| 852 | + * both. Due to a race condition with concurrent |
|---|
| 853 | + * memory accesses using the same TLB cache line, we |
|---|
| 854 | + * still need a second TLB flush after this. |
|---|
| 855 | + */ |
|---|
| 856 | + bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes && |
|---|
| 857 | + adev->asic_type == CHIP_VEGA20); |
|---|
| 858 | + /* 2 dwords flush + 8 dwords fence */ |
|---|
| 859 | + unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8; |
|---|
| 860 | + |
|---|
| 861 | + if (vega20_xgmi_wa) |
|---|
| 862 | + ndw += kiq->pmf->invalidate_tlbs_size; |
|---|
| 863 | + |
|---|
| 864 | + spin_lock(&adev->gfx.kiq.ring_lock); |
|---|
| 865 | + /* 2 dwords flush + 8 dwords fence */ |
|---|
| 866 | + amdgpu_ring_alloc(ring, ndw); |
|---|
| 867 | + if (vega20_xgmi_wa) |
|---|
| 868 | + kiq->pmf->kiq_invalidate_tlbs(ring, |
|---|
| 869 | + pasid, 2, all_hub); |
|---|
| 870 | + kiq->pmf->kiq_invalidate_tlbs(ring, |
|---|
| 871 | + pasid, flush_type, all_hub); |
|---|
| 872 | + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
|---|
| 873 | + if (r) { |
|---|
| 874 | + amdgpu_ring_undo(ring); |
|---|
| 875 | + spin_unlock(&adev->gfx.kiq.ring_lock); |
|---|
| 876 | + up_read(&adev->reset_sem); |
|---|
| 877 | + return -ETIME; |
|---|
| 878 | + } |
|---|
| 879 | + |
|---|
| 880 | + amdgpu_ring_commit(ring); |
|---|
| 881 | + spin_unlock(&adev->gfx.kiq.ring_lock); |
|---|
| 882 | + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); |
|---|
| 883 | + if (r < 1) { |
|---|
| 884 | + dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); |
|---|
| 885 | + up_read(&adev->reset_sem); |
|---|
| 886 | + return -ETIME; |
|---|
| 887 | + } |
|---|
| 888 | + up_read(&adev->reset_sem); |
|---|
| 889 | + return 0; |
|---|
| 890 | + } |
|---|
| 891 | + |
|---|
| 892 | + for (vmid = 1; vmid < 16; vmid++) { |
|---|
| 893 | + |
|---|
| 894 | + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, |
|---|
| 895 | + &queried_pasid); |
|---|
| 896 | + if (ret && queried_pasid == pasid) { |
|---|
| 897 | + if (all_hub) { |
|---|
| 898 | + for (i = 0; i < adev->num_vmhubs; i++) |
|---|
| 899 | + gmc_v9_0_flush_gpu_tlb(adev, vmid, |
|---|
| 900 | + i, flush_type); |
|---|
| 901 | + } else { |
|---|
| 902 | + gmc_v9_0_flush_gpu_tlb(adev, vmid, |
|---|
| 903 | + AMDGPU_GFXHUB_0, flush_type); |
|---|
| 904 | + } |
|---|
| 905 | + break; |
|---|
| 906 | + } |
|---|
| 907 | + } |
|---|
| 908 | + |
|---|
| 909 | + return 0; |
|---|
| 910 | + |
|---|
| 369 | 911 | } |
|---|
| 370 | 912 | |
|---|
| 371 | 913 | static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, |
|---|
| 372 | 914 | unsigned vmid, uint64_t pd_addr) |
|---|
| 373 | 915 | { |
|---|
| 916 | + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); |
|---|
| 374 | 917 | struct amdgpu_device *adev = ring->adev; |
|---|
| 375 | 918 | struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; |
|---|
| 376 | | - uint32_t req = gmc_v9_0_get_invalidate_req(vmid); |
|---|
| 377 | | - uint64_t flags = AMDGPU_PTE_VALID; |
|---|
| 919 | + uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); |
|---|
| 378 | 920 | unsigned eng = ring->vm_inv_eng; |
|---|
| 379 | 921 | |
|---|
| 380 | | - amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags); |
|---|
| 381 | | - pd_addr |= flags; |
|---|
| 922 | + /* |
|---|
| 923 | + * It may lose gpuvm invalidate acknowldege state across power-gating |
|---|
| 924 | + * off cycle, add semaphore acquire before invalidation and semaphore |
|---|
| 925 | + * release after invalidation to avoid entering power gated state |
|---|
| 926 | + * to WA the Issue |
|---|
| 927 | + */ |
|---|
| 382 | 928 | |
|---|
| 383 | | - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), |
|---|
| 929 | + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ |
|---|
| 930 | + if (use_semaphore) |
|---|
| 931 | + /* a read return value of 1 means semaphore acuqire */ |
|---|
| 932 | + amdgpu_ring_emit_reg_wait(ring, |
|---|
| 933 | + hub->vm_inv_eng0_sem + |
|---|
| 934 | + hub->eng_distance * eng, 0x1, 0x1); |
|---|
| 935 | + |
|---|
| 936 | + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + |
|---|
| 937 | + (hub->ctx_addr_distance * vmid), |
|---|
| 384 | 938 | lower_32_bits(pd_addr)); |
|---|
| 385 | 939 | |
|---|
| 386 | | - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), |
|---|
| 940 | + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + |
|---|
| 941 | + (hub->ctx_addr_distance * vmid), |
|---|
| 387 | 942 | upper_32_bits(pd_addr)); |
|---|
| 388 | 943 | |
|---|
| 389 | | - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, |
|---|
| 390 | | - hub->vm_inv_eng0_ack + eng, |
|---|
| 944 | + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + |
|---|
| 945 | + hub->eng_distance * eng, |
|---|
| 946 | + hub->vm_inv_eng0_ack + |
|---|
| 947 | + hub->eng_distance * eng, |
|---|
| 391 | 948 | req, 1 << vmid); |
|---|
| 949 | + |
|---|
| 950 | + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ |
|---|
| 951 | + if (use_semaphore) |
|---|
| 952 | + /* |
|---|
| 953 | + * add semaphore release after invalidation, |
|---|
| 954 | + * write with 0 means semaphore release |
|---|
| 955 | + */ |
|---|
| 956 | + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + |
|---|
| 957 | + hub->eng_distance * eng, 0); |
|---|
| 392 | 958 | |
|---|
| 393 | 959 | return pd_addr; |
|---|
| 394 | 960 | } |
|---|
| .. | .. |
|---|
| 399 | 965 | struct amdgpu_device *adev = ring->adev; |
|---|
| 400 | 966 | uint32_t reg; |
|---|
| 401 | 967 | |
|---|
| 402 | | - if (ring->funcs->vmhub == AMDGPU_GFXHUB) |
|---|
| 968 | + /* Do nothing because there's no lut register for mmhub1. */ |
|---|
| 969 | + if (ring->funcs->vmhub == AMDGPU_MMHUB_1) |
|---|
| 970 | + return; |
|---|
| 971 | + |
|---|
| 972 | + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) |
|---|
| 403 | 973 | reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; |
|---|
| 404 | 974 | else |
|---|
| 405 | 975 | reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; |
|---|
| .. | .. |
|---|
| 407 | 977 | amdgpu_ring_emit_wreg(ring, reg, pasid); |
|---|
| 408 | 978 | } |
|---|
| 409 | 979 | |
|---|
| 410 | | -/** |
|---|
| 411 | | - * gmc_v9_0_set_pte_pde - update the page tables using MMIO |
|---|
| 980 | +/* |
|---|
| 981 | + * PTE format on VEGA 10: |
|---|
| 982 | + * 63:59 reserved |
|---|
| 983 | + * 58:57 mtype |
|---|
| 984 | + * 56 F |
|---|
| 985 | + * 55 L |
|---|
| 986 | + * 54 P |
|---|
| 987 | + * 53 SW |
|---|
| 988 | + * 52 T |
|---|
| 989 | + * 50:48 reserved |
|---|
| 990 | + * 47:12 4k physical page base address |
|---|
| 991 | + * 11:7 fragment |
|---|
| 992 | + * 6 write |
|---|
| 993 | + * 5 read |
|---|
| 994 | + * 4 exe |
|---|
| 995 | + * 3 Z |
|---|
| 996 | + * 2 snooped |
|---|
| 997 | + * 1 system |
|---|
| 998 | + * 0 valid |
|---|
| 412 | 999 | * |
|---|
| 413 | | - * @adev: amdgpu_device pointer |
|---|
| 414 | | - * @cpu_pt_addr: cpu address of the page table |
|---|
| 415 | | - * @gpu_page_idx: entry in the page table to update |
|---|
| 416 | | - * @addr: dst addr to write into pte/pde |
|---|
| 417 | | - * @flags: access flags |
|---|
| 418 | | - * |
|---|
| 419 | | - * Update the page tables using the CPU. |
|---|
| 1000 | + * PDE format on VEGA 10: |
|---|
| 1001 | + * 63:59 block fragment size |
|---|
| 1002 | + * 58:55 reserved |
|---|
| 1003 | + * 54 P |
|---|
| 1004 | + * 53:48 reserved |
|---|
| 1005 | + * 47:6 physical base address of PD or PTE |
|---|
| 1006 | + * 5:3 reserved |
|---|
| 1007 | + * 2 C |
|---|
| 1008 | + * 1 system |
|---|
| 1009 | + * 0 valid |
|---|
| 420 | 1010 | */ |
|---|
| 421 | | -static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, |
|---|
| 422 | | - uint32_t gpu_page_idx, uint64_t addr, |
|---|
| 423 | | - uint64_t flags) |
|---|
| 424 | | -{ |
|---|
| 425 | | - void __iomem *ptr = (void *)cpu_pt_addr; |
|---|
| 426 | | - uint64_t value; |
|---|
| 427 | 1011 | |
|---|
| 428 | | - /* |
|---|
| 429 | | - * PTE format on VEGA 10: |
|---|
| 430 | | - * 63:59 reserved |
|---|
| 431 | | - * 58:57 mtype |
|---|
| 432 | | - * 56 F |
|---|
| 433 | | - * 55 L |
|---|
| 434 | | - * 54 P |
|---|
| 435 | | - * 53 SW |
|---|
| 436 | | - * 52 T |
|---|
| 437 | | - * 50:48 reserved |
|---|
| 438 | | - * 47:12 4k physical page base address |
|---|
| 439 | | - * 11:7 fragment |
|---|
| 440 | | - * 6 write |
|---|
| 441 | | - * 5 read |
|---|
| 442 | | - * 4 exe |
|---|
| 443 | | - * 3 Z |
|---|
| 444 | | - * 2 snooped |
|---|
| 445 | | - * 1 system |
|---|
| 446 | | - * 0 valid |
|---|
| 447 | | - * |
|---|
| 448 | | - * PDE format on VEGA 10: |
|---|
| 449 | | - * 63:59 block fragment size |
|---|
| 450 | | - * 58:55 reserved |
|---|
| 451 | | - * 54 P |
|---|
| 452 | | - * 53:48 reserved |
|---|
| 453 | | - * 47:6 physical base address of PD or PTE |
|---|
| 454 | | - * 5:3 reserved |
|---|
| 455 | | - * 2 C |
|---|
| 456 | | - * 1 system |
|---|
| 457 | | - * 0 valid |
|---|
| 458 | | - */ |
|---|
| 459 | | - |
|---|
| 460 | | - /* |
|---|
| 461 | | - * The following is for PTE only. GART does not have PDEs. |
|---|
| 462 | | - */ |
|---|
| 463 | | - value = addr & 0x0000FFFFFFFFF000ULL; |
|---|
| 464 | | - value |= flags; |
|---|
| 465 | | - writeq(value, ptr + (gpu_page_idx * 8)); |
|---|
| 466 | | - return 0; |
|---|
| 467 | | -} |
|---|
| 468 | | - |
|---|
| 469 | | -static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, |
|---|
| 470 | | - uint32_t flags) |
|---|
| 1012 | +static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) |
|---|
| 471 | 1013 | |
|---|
| 472 | 1014 | { |
|---|
| 473 | | - uint64_t pte_flag = 0; |
|---|
| 474 | | - |
|---|
| 475 | | - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) |
|---|
| 476 | | - pte_flag |= AMDGPU_PTE_EXECUTABLE; |
|---|
| 477 | | - if (flags & AMDGPU_VM_PAGE_READABLE) |
|---|
| 478 | | - pte_flag |= AMDGPU_PTE_READABLE; |
|---|
| 479 | | - if (flags & AMDGPU_VM_PAGE_WRITEABLE) |
|---|
| 480 | | - pte_flag |= AMDGPU_PTE_WRITEABLE; |
|---|
| 481 | | - |
|---|
| 482 | | - switch (flags & AMDGPU_VM_MTYPE_MASK) { |
|---|
| 1015 | + switch (flags) { |
|---|
| 483 | 1016 | case AMDGPU_VM_MTYPE_DEFAULT: |
|---|
| 484 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); |
|---|
| 485 | | - break; |
|---|
| 1017 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); |
|---|
| 486 | 1018 | case AMDGPU_VM_MTYPE_NC: |
|---|
| 487 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); |
|---|
| 488 | | - break; |
|---|
| 1019 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); |
|---|
| 489 | 1020 | case AMDGPU_VM_MTYPE_WC: |
|---|
| 490 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC); |
|---|
| 491 | | - break; |
|---|
| 1021 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); |
|---|
| 1022 | + case AMDGPU_VM_MTYPE_RW: |
|---|
| 1023 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); |
|---|
| 492 | 1024 | case AMDGPU_VM_MTYPE_CC: |
|---|
| 493 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC); |
|---|
| 494 | | - break; |
|---|
| 1025 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); |
|---|
| 495 | 1026 | case AMDGPU_VM_MTYPE_UC: |
|---|
| 496 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC); |
|---|
| 497 | | - break; |
|---|
| 1027 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); |
|---|
| 498 | 1028 | default: |
|---|
| 499 | | - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); |
|---|
| 500 | | - break; |
|---|
| 1029 | + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); |
|---|
| 501 | 1030 | } |
|---|
| 502 | | - |
|---|
| 503 | | - if (flags & AMDGPU_VM_PAGE_PRT) |
|---|
| 504 | | - pte_flag |= AMDGPU_PTE_PRT; |
|---|
| 505 | | - |
|---|
| 506 | | - return pte_flag; |
|---|
| 507 | 1031 | } |
|---|
| 508 | 1032 | |
|---|
| 509 | 1033 | static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, |
|---|
| 510 | 1034 | uint64_t *addr, uint64_t *flags) |
|---|
| 511 | 1035 | { |
|---|
| 512 | | - if (!(*flags & AMDGPU_PDE_PTE)) |
|---|
| 1036 | + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) |
|---|
| 513 | 1037 | *addr = adev->vm_manager.vram_base_offset + *addr - |
|---|
| 514 | 1038 | adev->gmc.vram_start; |
|---|
| 515 | 1039 | BUG_ON(*addr & 0xFFFF00000000003FULL); |
|---|
| .. | .. |
|---|
| 530 | 1054 | } |
|---|
| 531 | 1055 | } |
|---|
| 532 | 1056 | |
|---|
| 1057 | +static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, |
|---|
| 1058 | + struct amdgpu_bo_va_mapping *mapping, |
|---|
| 1059 | + uint64_t *flags) |
|---|
| 1060 | +{ |
|---|
| 1061 | + *flags &= ~AMDGPU_PTE_EXECUTABLE; |
|---|
| 1062 | + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; |
|---|
| 1063 | + |
|---|
| 1064 | + *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; |
|---|
| 1065 | + *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; |
|---|
| 1066 | + |
|---|
| 1067 | + if (mapping->flags & AMDGPU_PTE_PRT) { |
|---|
| 1068 | + *flags |= AMDGPU_PTE_PRT; |
|---|
| 1069 | + *flags &= ~AMDGPU_PTE_VALID; |
|---|
| 1070 | + } |
|---|
| 1071 | + |
|---|
| 1072 | + if (adev->asic_type == CHIP_ARCTURUS && |
|---|
| 1073 | + !(*flags & AMDGPU_PTE_SYSTEM) && |
|---|
| 1074 | + mapping->bo_va->is_xgmi) |
|---|
| 1075 | + *flags |= AMDGPU_PTE_SNOOPED; |
|---|
| 1076 | +} |
|---|
| 1077 | + |
|---|
| 1078 | +static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) |
|---|
| 1079 | +{ |
|---|
| 1080 | + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); |
|---|
| 1081 | + unsigned size; |
|---|
| 1082 | + |
|---|
| 1083 | + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { |
|---|
| 1084 | + size = AMDGPU_VBIOS_VGA_ALLOCATION; |
|---|
| 1085 | + } else { |
|---|
| 1086 | + u32 viewport; |
|---|
| 1087 | + |
|---|
| 1088 | + switch (adev->asic_type) { |
|---|
| 1089 | + case CHIP_RAVEN: |
|---|
| 1090 | + case CHIP_RENOIR: |
|---|
| 1091 | + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); |
|---|
| 1092 | + size = (REG_GET_FIELD(viewport, |
|---|
| 1093 | + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * |
|---|
| 1094 | + REG_GET_FIELD(viewport, |
|---|
| 1095 | + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * |
|---|
| 1096 | + 4); |
|---|
| 1097 | + break; |
|---|
| 1098 | + case CHIP_VEGA10: |
|---|
| 1099 | + case CHIP_VEGA12: |
|---|
| 1100 | + case CHIP_VEGA20: |
|---|
| 1101 | + default: |
|---|
| 1102 | + viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); |
|---|
| 1103 | + size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * |
|---|
| 1104 | + REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * |
|---|
| 1105 | + 4); |
|---|
| 1106 | + break; |
|---|
| 1107 | + } |
|---|
| 1108 | + } |
|---|
| 1109 | + |
|---|
| 1110 | + return size; |
|---|
| 1111 | +} |
|---|
| 1112 | + |
|---|
| 533 | 1113 | static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { |
|---|
| 534 | 1114 | .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, |
|---|
| 1115 | + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, |
|---|
| 535 | 1116 | .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, |
|---|
| 536 | 1117 | .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, |
|---|
| 537 | | - .set_pte_pde = gmc_v9_0_set_pte_pde, |
|---|
| 538 | | - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, |
|---|
| 539 | | - .get_vm_pde = gmc_v9_0_get_vm_pde |
|---|
| 1118 | + .map_mtype = gmc_v9_0_map_mtype, |
|---|
| 1119 | + .get_vm_pde = gmc_v9_0_get_vm_pde, |
|---|
| 1120 | + .get_vm_pte = gmc_v9_0_get_vm_pte, |
|---|
| 1121 | + .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, |
|---|
| 540 | 1122 | }; |
|---|
| 541 | 1123 | |
|---|
| 542 | 1124 | static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) |
|---|
| 543 | 1125 | { |
|---|
| 544 | | - if (adev->gmc.gmc_funcs == NULL) |
|---|
| 545 | | - adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; |
|---|
| 1126 | + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; |
|---|
| 1127 | +} |
|---|
| 1128 | + |
|---|
| 1129 | +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) |
|---|
| 1130 | +{ |
|---|
| 1131 | + switch (adev->asic_type) { |
|---|
| 1132 | + case CHIP_VEGA10: |
|---|
| 1133 | + adev->umc.funcs = &umc_v6_0_funcs; |
|---|
| 1134 | + break; |
|---|
| 1135 | + case CHIP_VEGA20: |
|---|
| 1136 | + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; |
|---|
| 1137 | + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; |
|---|
| 1138 | + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; |
|---|
| 1139 | + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; |
|---|
| 1140 | + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; |
|---|
| 1141 | + adev->umc.funcs = &umc_v6_1_funcs; |
|---|
| 1142 | + break; |
|---|
| 1143 | + case CHIP_ARCTURUS: |
|---|
| 1144 | + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; |
|---|
| 1145 | + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; |
|---|
| 1146 | + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; |
|---|
| 1147 | + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; |
|---|
| 1148 | + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; |
|---|
| 1149 | + adev->umc.funcs = &umc_v6_1_funcs; |
|---|
| 1150 | + break; |
|---|
| 1151 | + default: |
|---|
| 1152 | + break; |
|---|
| 1153 | + } |
|---|
| 1154 | +} |
|---|
| 1155 | + |
|---|
| 1156 | +static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) |
|---|
| 1157 | +{ |
|---|
| 1158 | + switch (adev->asic_type) { |
|---|
| 1159 | + case CHIP_ARCTURUS: |
|---|
| 1160 | + adev->mmhub.funcs = &mmhub_v9_4_funcs; |
|---|
| 1161 | + break; |
|---|
| 1162 | + default: |
|---|
| 1163 | + adev->mmhub.funcs = &mmhub_v1_0_funcs; |
|---|
| 1164 | + break; |
|---|
| 1165 | + } |
|---|
| 1166 | +} |
|---|
| 1167 | + |
|---|
| 1168 | +static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) |
|---|
| 1169 | +{ |
|---|
| 1170 | + switch (adev->asic_type) { |
|---|
| 1171 | + case CHIP_ARCTURUS: |
|---|
| 1172 | + case CHIP_VEGA20: |
|---|
| 1173 | + adev->gfxhub.funcs = &gfxhub_v1_1_funcs; |
|---|
| 1174 | + break; |
|---|
| 1175 | + default: |
|---|
| 1176 | + adev->gfxhub.funcs = &gfxhub_v1_0_funcs; |
|---|
| 1177 | + break; |
|---|
| 1178 | + } |
|---|
| 546 | 1179 | } |
|---|
| 547 | 1180 | |
|---|
| 548 | 1181 | static int gmc_v9_0_early_init(void *handle) |
|---|
| .. | .. |
|---|
| 551 | 1184 | |
|---|
| 552 | 1185 | gmc_v9_0_set_gmc_funcs(adev); |
|---|
| 553 | 1186 | gmc_v9_0_set_irq_funcs(adev); |
|---|
| 1187 | + gmc_v9_0_set_umc_funcs(adev); |
|---|
| 1188 | + gmc_v9_0_set_mmhub_funcs(adev); |
|---|
| 1189 | + gmc_v9_0_set_gfxhub_funcs(adev); |
|---|
| 554 | 1190 | |
|---|
| 555 | 1191 | adev->gmc.shared_aperture_start = 0x2000000000000000ULL; |
|---|
| 556 | 1192 | adev->gmc.shared_aperture_end = |
|---|
| .. | .. |
|---|
| 562 | 1198 | return 0; |
|---|
| 563 | 1199 | } |
|---|
| 564 | 1200 | |
|---|
| 565 | | -static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) |
|---|
| 566 | | -{ |
|---|
| 567 | | - uint32_t reg_val; |
|---|
| 568 | | - uint32_t reg_addr; |
|---|
| 569 | | - uint32_t field_val; |
|---|
| 570 | | - size_t i; |
|---|
| 571 | | - uint32_t fv2; |
|---|
| 572 | | - size_t lost_sheep; |
|---|
| 573 | | - |
|---|
| 574 | | - DRM_DEBUG("ecc: gmc_v9_0_ecc_available()\n"); |
|---|
| 575 | | - |
|---|
| 576 | | - lost_sheep = 0; |
|---|
| 577 | | - for (i = 0; i < ARRAY_SIZE(ecc_umclocalcap_addrs); ++i) { |
|---|
| 578 | | - reg_addr = ecc_umclocalcap_addrs[i]; |
|---|
| 579 | | - DRM_DEBUG("ecc: " |
|---|
| 580 | | - "UMCCH_UmcLocalCap[%zu]: reg_addr: 0x%08x\n", |
|---|
| 581 | | - i, reg_addr); |
|---|
| 582 | | - reg_val = RREG32(reg_addr); |
|---|
| 583 | | - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UmcLocalCap, |
|---|
| 584 | | - EccDis); |
|---|
| 585 | | - DRM_DEBUG("ecc: " |
|---|
| 586 | | - "reg_val: 0x%08x, " |
|---|
| 587 | | - "EccDis: 0x%08x, ", |
|---|
| 588 | | - reg_val, field_val); |
|---|
| 589 | | - if (field_val) { |
|---|
| 590 | | - DRM_ERROR("ecc: UmcLocalCap:EccDis is set.\n"); |
|---|
| 591 | | - ++lost_sheep; |
|---|
| 592 | | - } |
|---|
| 593 | | - } |
|---|
| 594 | | - |
|---|
| 595 | | - for (i = 0; i < ARRAY_SIZE(ecc_umcch_umc_config_addrs); ++i) { |
|---|
| 596 | | - reg_addr = ecc_umcch_umc_config_addrs[i]; |
|---|
| 597 | | - DRM_DEBUG("ecc: " |
|---|
| 598 | | - "UMCCH0_0_UMC_CONFIG[%zu]: reg_addr: 0x%08x", |
|---|
| 599 | | - i, reg_addr); |
|---|
| 600 | | - reg_val = RREG32(reg_addr); |
|---|
| 601 | | - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UMC_CONFIG, |
|---|
| 602 | | - DramReady); |
|---|
| 603 | | - DRM_DEBUG("ecc: " |
|---|
| 604 | | - "reg_val: 0x%08x, " |
|---|
| 605 | | - "DramReady: 0x%08x\n", |
|---|
| 606 | | - reg_val, field_val); |
|---|
| 607 | | - |
|---|
| 608 | | - if (!field_val) { |
|---|
| 609 | | - DRM_ERROR("ecc: UMC_CONFIG:DramReady is not set.\n"); |
|---|
| 610 | | - ++lost_sheep; |
|---|
| 611 | | - } |
|---|
| 612 | | - } |
|---|
| 613 | | - |
|---|
| 614 | | - for (i = 0; i < ARRAY_SIZE(ecc_umcch_eccctrl_addrs); ++i) { |
|---|
| 615 | | - reg_addr = ecc_umcch_eccctrl_addrs[i]; |
|---|
| 616 | | - DRM_DEBUG("ecc: " |
|---|
| 617 | | - "UMCCH_EccCtrl[%zu]: reg_addr: 0x%08x, ", |
|---|
| 618 | | - i, reg_addr); |
|---|
| 619 | | - reg_val = RREG32(reg_addr); |
|---|
| 620 | | - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, |
|---|
| 621 | | - WrEccEn); |
|---|
| 622 | | - fv2 = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, |
|---|
| 623 | | - RdEccEn); |
|---|
| 624 | | - DRM_DEBUG("ecc: " |
|---|
| 625 | | - "reg_val: 0x%08x, " |
|---|
| 626 | | - "WrEccEn: 0x%08x, " |
|---|
| 627 | | - "RdEccEn: 0x%08x\n", |
|---|
| 628 | | - reg_val, field_val, fv2); |
|---|
| 629 | | - |
|---|
| 630 | | - if (!field_val) { |
|---|
| 631 | | - DRM_DEBUG("ecc: WrEccEn is not set\n"); |
|---|
| 632 | | - ++lost_sheep; |
|---|
| 633 | | - } |
|---|
| 634 | | - if (!fv2) { |
|---|
| 635 | | - DRM_DEBUG("ecc: RdEccEn is not set\n"); |
|---|
| 636 | | - ++lost_sheep; |
|---|
| 637 | | - } |
|---|
| 638 | | - } |
|---|
| 639 | | - |
|---|
| 640 | | - DRM_DEBUG("ecc: lost_sheep: %zu\n", lost_sheep); |
|---|
| 641 | | - return lost_sheep == 0; |
|---|
| 642 | | -} |
|---|
| 643 | | - |
|---|
| 644 | 1201 | static int gmc_v9_0_late_init(void *handle) |
|---|
| 645 | 1202 | { |
|---|
| 646 | 1203 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 647 | | - /* |
|---|
| 648 | | - * The latest engine allocation on gfx9 is: |
|---|
| 649 | | - * Engine 0, 1: idle |
|---|
| 650 | | - * Engine 2, 3: firmware |
|---|
| 651 | | - * Engine 4~13: amdgpu ring, subject to change when ring number changes |
|---|
| 652 | | - * Engine 14~15: idle |
|---|
| 653 | | - * Engine 16: kfd tlb invalidation |
|---|
| 654 | | - * Engine 17: Gart flushes |
|---|
| 655 | | - */ |
|---|
| 656 | | - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; |
|---|
| 657 | | - unsigned i; |
|---|
| 658 | 1204 | int r; |
|---|
| 659 | 1205 | |
|---|
| 1206 | + amdgpu_bo_late_init(adev); |
|---|
| 1207 | + |
|---|
| 1208 | + r = amdgpu_gmc_allocate_vm_inv_eng(adev); |
|---|
| 1209 | + if (r) |
|---|
| 1210 | + return r; |
|---|
| 1211 | + |
|---|
| 660 | 1212 | /* |
|---|
| 661 | | - * TODO - Uncomment once GART corruption issue is fixed. |
|---|
| 1213 | + * Workaround performance drop issue with VBIOS enables partial |
|---|
| 1214 | + * writes, while disables HBM ECC for vega10. |
|---|
| 662 | 1215 | */ |
|---|
| 663 | | - /* amdgpu_bo_late_init(adev); */ |
|---|
| 664 | | - |
|---|
| 665 | | - for(i = 0; i < adev->num_rings; ++i) { |
|---|
| 666 | | - struct amdgpu_ring *ring = adev->rings[i]; |
|---|
| 667 | | - unsigned vmhub = ring->funcs->vmhub; |
|---|
| 668 | | - |
|---|
| 669 | | - ring->vm_inv_eng = vm_inv_eng[vmhub]++; |
|---|
| 670 | | - dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", |
|---|
| 671 | | - ring->idx, ring->name, ring->vm_inv_eng, |
|---|
| 672 | | - ring->funcs->vmhub); |
|---|
| 673 | | - } |
|---|
| 674 | | - |
|---|
| 675 | | - /* Engine 16 is used for KFD and 17 for GART flushes */ |
|---|
| 676 | | - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) |
|---|
| 677 | | - BUG_ON(vm_inv_eng[i] > 16); |
|---|
| 678 | | - |
|---|
| 679 | | - if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { |
|---|
| 680 | | - r = gmc_v9_0_ecc_available(adev); |
|---|
| 681 | | - if (r == 1) { |
|---|
| 682 | | - DRM_INFO("ECC is active.\n"); |
|---|
| 683 | | - } else if (r == 0) { |
|---|
| 684 | | - DRM_INFO("ECC is not present.\n"); |
|---|
| 685 | | - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); |
|---|
| 686 | | - } else { |
|---|
| 687 | | - DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); |
|---|
| 688 | | - return r; |
|---|
| 1216 | + if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { |
|---|
| 1217 | + if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) { |
|---|
| 1218 | + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) |
|---|
| 1219 | + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); |
|---|
| 689 | 1220 | } |
|---|
| 690 | 1221 | } |
|---|
| 1222 | + |
|---|
| 1223 | + if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) |
|---|
| 1224 | + adev->mmhub.funcs->reset_ras_error_count(adev); |
|---|
| 1225 | + |
|---|
| 1226 | + r = amdgpu_gmc_ras_late_init(adev); |
|---|
| 1227 | + if (r) |
|---|
| 1228 | + return r; |
|---|
| 691 | 1229 | |
|---|
| 692 | 1230 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); |
|---|
| 693 | 1231 | } |
|---|
| .. | .. |
|---|
| 696 | 1234 | struct amdgpu_gmc *mc) |
|---|
| 697 | 1235 | { |
|---|
| 698 | 1236 | u64 base = 0; |
|---|
| 1237 | + |
|---|
| 699 | 1238 | if (!amdgpu_sriov_vf(adev)) |
|---|
| 700 | | - base = mmhub_v1_0_get_fb_location(adev); |
|---|
| 701 | | - amdgpu_device_vram_location(adev, &adev->gmc, base); |
|---|
| 702 | | - amdgpu_device_gart_location(adev, mc); |
|---|
| 1239 | + base = adev->mmhub.funcs->get_fb_location(adev); |
|---|
| 1240 | + |
|---|
| 1241 | + /* add the xgmi offset of the physical node */ |
|---|
| 1242 | + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; |
|---|
| 1243 | + amdgpu_gmc_vram_location(adev, mc, base); |
|---|
| 1244 | + amdgpu_gmc_gart_location(adev, mc); |
|---|
| 1245 | + amdgpu_gmc_agp_location(adev, mc); |
|---|
| 703 | 1246 | /* base offset of vram pages */ |
|---|
| 704 | | - adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); |
|---|
| 1247 | + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); |
|---|
| 1248 | + |
|---|
| 1249 | + /* XXX: add the xgmi offset of the physical node? */ |
|---|
| 1250 | + adev->vm_manager.vram_base_offset += |
|---|
| 1251 | + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; |
|---|
| 705 | 1252 | } |
|---|
| 706 | 1253 | |
|---|
| 707 | 1254 | /** |
|---|
| .. | .. |
|---|
| 715 | 1262 | */ |
|---|
| 716 | 1263 | static int gmc_v9_0_mc_init(struct amdgpu_device *adev) |
|---|
| 717 | 1264 | { |
|---|
| 718 | | - int chansize, numchan; |
|---|
| 719 | 1265 | int r; |
|---|
| 720 | | - |
|---|
| 721 | | - if (amdgpu_emu_mode != 1) |
|---|
| 722 | | - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); |
|---|
| 723 | | - if (!adev->gmc.vram_width) { |
|---|
| 724 | | - /* hbm memory channel size */ |
|---|
| 725 | | - if (adev->flags & AMD_IS_APU) |
|---|
| 726 | | - chansize = 64; |
|---|
| 727 | | - else |
|---|
| 728 | | - chansize = 128; |
|---|
| 729 | | - |
|---|
| 730 | | - numchan = adev->df_funcs->get_hbm_channel_number(adev); |
|---|
| 731 | | - adev->gmc.vram_width = numchan * chansize; |
|---|
| 732 | | - } |
|---|
| 733 | 1266 | |
|---|
| 734 | 1267 | /* size in MB on si */ |
|---|
| 735 | 1268 | adev->gmc.mc_vram_size = |
|---|
| 736 | | - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; |
|---|
| 1269 | + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; |
|---|
| 737 | 1270 | adev->gmc.real_vram_size = adev->gmc.mc_vram_size; |
|---|
| 738 | 1271 | |
|---|
| 739 | 1272 | if (!(adev->flags & AMD_IS_APU)) { |
|---|
| .. | .. |
|---|
| 746 | 1279 | |
|---|
| 747 | 1280 | #ifdef CONFIG_X86_64 |
|---|
| 748 | 1281 | if (adev->flags & AMD_IS_APU) { |
|---|
| 749 | | - adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev); |
|---|
| 1282 | + adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); |
|---|
| 750 | 1283 | adev->gmc.aper_size = adev->gmc.real_vram_size; |
|---|
| 751 | 1284 | } |
|---|
| 752 | 1285 | #endif |
|---|
| .. | .. |
|---|
| 761 | 1294 | case CHIP_VEGA10: /* all engines support GPUVM */ |
|---|
| 762 | 1295 | case CHIP_VEGA12: /* all engines support GPUVM */ |
|---|
| 763 | 1296 | case CHIP_VEGA20: |
|---|
| 1297 | + case CHIP_ARCTURUS: |
|---|
| 764 | 1298 | default: |
|---|
| 765 | 1299 | adev->gmc.gart_size = 512ULL << 20; |
|---|
| 766 | 1300 | break; |
|---|
| 767 | 1301 | case CHIP_RAVEN: /* DCE SG support */ |
|---|
| 1302 | + case CHIP_RENOIR: |
|---|
| 768 | 1303 | adev->gmc.gart_size = 1024ULL << 20; |
|---|
| 769 | 1304 | break; |
|---|
| 770 | 1305 | } |
|---|
| .. | .. |
|---|
| 781 | 1316 | { |
|---|
| 782 | 1317 | int r; |
|---|
| 783 | 1318 | |
|---|
| 784 | | - if (adev->gart.robj) { |
|---|
| 1319 | + if (adev->gart.bo) { |
|---|
| 785 | 1320 | WARN(1, "VEGA10 PCIE GART already initialized\n"); |
|---|
| 786 | 1321 | return 0; |
|---|
| 787 | 1322 | } |
|---|
| .. | .. |
|---|
| 790 | 1325 | if (r) |
|---|
| 791 | 1326 | return r; |
|---|
| 792 | 1327 | adev->gart.table_size = adev->gart.num_gpu_pages * 8; |
|---|
| 793 | | - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) | |
|---|
| 1328 | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | |
|---|
| 794 | 1329 | AMDGPU_PTE_EXECUTABLE; |
|---|
| 795 | 1330 | return amdgpu_gart_table_vram_alloc(adev); |
|---|
| 796 | 1331 | } |
|---|
| 797 | 1332 | |
|---|
| 798 | | -static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) |
|---|
| 1333 | +/** |
|---|
| 1334 | + * gmc_v9_0_save_registers - saves regs |
|---|
| 1335 | + * |
|---|
| 1336 | + * @adev: amdgpu_device pointer |
|---|
| 1337 | + * |
|---|
| 1338 | + * This saves potential register values that should be |
|---|
| 1339 | + * restored upon resume |
|---|
| 1340 | + */ |
|---|
| 1341 | +static void gmc_v9_0_save_registers(struct amdgpu_device *adev) |
|---|
| 799 | 1342 | { |
|---|
| 800 | | -#if 0 |
|---|
| 801 | | - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); |
|---|
| 802 | | -#endif |
|---|
| 803 | | - unsigned size; |
|---|
| 804 | | - |
|---|
| 805 | | - /* |
|---|
| 806 | | - * TODO Remove once GART corruption is resolved |
|---|
| 807 | | - * Check related code in gmc_v9_0_sw_fini |
|---|
| 808 | | - * */ |
|---|
| 809 | | - size = 9 * 1024 * 1024; |
|---|
| 810 | | - |
|---|
| 811 | | -#if 0 |
|---|
| 812 | | - if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { |
|---|
| 813 | | - size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ |
|---|
| 814 | | - } else { |
|---|
| 815 | | - u32 viewport; |
|---|
| 816 | | - |
|---|
| 817 | | - switch (adev->asic_type) { |
|---|
| 818 | | - case CHIP_RAVEN: |
|---|
| 819 | | - viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); |
|---|
| 820 | | - size = (REG_GET_FIELD(viewport, |
|---|
| 821 | | - HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * |
|---|
| 822 | | - REG_GET_FIELD(viewport, |
|---|
| 823 | | - HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * |
|---|
| 824 | | - 4); |
|---|
| 825 | | - break; |
|---|
| 826 | | - case CHIP_VEGA10: |
|---|
| 827 | | - case CHIP_VEGA12: |
|---|
| 828 | | - default: |
|---|
| 829 | | - viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); |
|---|
| 830 | | - size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * |
|---|
| 831 | | - REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * |
|---|
| 832 | | - 4); |
|---|
| 833 | | - break; |
|---|
| 834 | | - } |
|---|
| 835 | | - } |
|---|
| 836 | | - /* return 0 if the pre-OS buffer uses up most of vram */ |
|---|
| 837 | | - if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) |
|---|
| 838 | | - return 0; |
|---|
| 839 | | - |
|---|
| 840 | | -#endif |
|---|
| 841 | | - return size; |
|---|
| 1343 | + if (adev->asic_type == CHIP_RAVEN) |
|---|
| 1344 | + adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); |
|---|
| 842 | 1345 | } |
|---|
| 843 | 1346 | |
|---|
| 844 | 1347 | static int gmc_v9_0_sw_init(void *handle) |
|---|
| 845 | 1348 | { |
|---|
| 846 | | - int r; |
|---|
| 847 | | - int dma_bits; |
|---|
| 1349 | + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; |
|---|
| 848 | 1350 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 849 | 1351 | |
|---|
| 850 | | - gfxhub_v1_0_init(adev); |
|---|
| 851 | | - mmhub_v1_0_init(adev); |
|---|
| 1352 | + adev->gfxhub.funcs->init(adev); |
|---|
| 1353 | + |
|---|
| 1354 | + adev->mmhub.funcs->init(adev); |
|---|
| 852 | 1355 | |
|---|
| 853 | 1356 | spin_lock_init(&adev->gmc.invalidate_lock); |
|---|
| 854 | 1357 | |
|---|
| 855 | | - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); |
|---|
| 1358 | + r = amdgpu_atomfirmware_get_vram_info(adev, |
|---|
| 1359 | + &vram_width, &vram_type, &vram_vendor); |
|---|
| 1360 | + if (amdgpu_sriov_vf(adev)) |
|---|
| 1361 | + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, |
|---|
| 1362 | + * and DF related registers is not readable, seems hardcord is the |
|---|
| 1363 | + * only way to set the correct vram_width |
|---|
| 1364 | + */ |
|---|
| 1365 | + adev->gmc.vram_width = 2048; |
|---|
| 1366 | + else if (amdgpu_emu_mode != 1) |
|---|
| 1367 | + adev->gmc.vram_width = vram_width; |
|---|
| 1368 | + |
|---|
| 1369 | + if (!adev->gmc.vram_width) { |
|---|
| 1370 | + int chansize, numchan; |
|---|
| 1371 | + |
|---|
| 1372 | + /* hbm memory channel size */ |
|---|
| 1373 | + if (adev->flags & AMD_IS_APU) |
|---|
| 1374 | + chansize = 64; |
|---|
| 1375 | + else |
|---|
| 1376 | + chansize = 128; |
|---|
| 1377 | + |
|---|
| 1378 | + numchan = adev->df.funcs->get_hbm_channel_number(adev); |
|---|
| 1379 | + adev->gmc.vram_width = numchan * chansize; |
|---|
| 1380 | + } |
|---|
| 1381 | + |
|---|
| 1382 | + adev->gmc.vram_type = vram_type; |
|---|
| 1383 | + adev->gmc.vram_vendor = vram_vendor; |
|---|
| 856 | 1384 | switch (adev->asic_type) { |
|---|
| 857 | 1385 | case CHIP_RAVEN: |
|---|
| 1386 | + adev->num_vmhubs = 2; |
|---|
| 1387 | + |
|---|
| 858 | 1388 | if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { |
|---|
| 859 | 1389 | amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); |
|---|
| 860 | 1390 | } else { |
|---|
| .. | .. |
|---|
| 867 | 1397 | case CHIP_VEGA10: |
|---|
| 868 | 1398 | case CHIP_VEGA12: |
|---|
| 869 | 1399 | case CHIP_VEGA20: |
|---|
| 1400 | + case CHIP_RENOIR: |
|---|
| 1401 | + adev->num_vmhubs = 2; |
|---|
| 1402 | + |
|---|
| 1403 | + |
|---|
| 870 | 1404 | /* |
|---|
| 871 | 1405 | * To fulfill 4-level page support, |
|---|
| 872 | 1406 | * vm size is 256TB (48bit), maximum size of Vega10, |
|---|
| 873 | 1407 | * block size 512 (9bit) |
|---|
| 874 | 1408 | */ |
|---|
| 1409 | + /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ |
|---|
| 1410 | + if (amdgpu_sriov_vf(adev)) |
|---|
| 1411 | + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); |
|---|
| 1412 | + else |
|---|
| 1413 | + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); |
|---|
| 1414 | + break; |
|---|
| 1415 | + case CHIP_ARCTURUS: |
|---|
| 1416 | + adev->num_vmhubs = 3; |
|---|
| 1417 | + |
|---|
| 1418 | + /* Keep the vm size same with Vega20 */ |
|---|
| 875 | 1419 | amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); |
|---|
| 876 | 1420 | break; |
|---|
| 877 | 1421 | default: |
|---|
| .. | .. |
|---|
| 881 | 1425 | /* This interrupt is VMC page fault.*/ |
|---|
| 882 | 1426 | r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, |
|---|
| 883 | 1427 | &adev->gmc.vm_fault); |
|---|
| 1428 | + if (r) |
|---|
| 1429 | + return r; |
|---|
| 1430 | + |
|---|
| 1431 | + if (adev->asic_type == CHIP_ARCTURUS) { |
|---|
| 1432 | + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, |
|---|
| 1433 | + &adev->gmc.vm_fault); |
|---|
| 1434 | + if (r) |
|---|
| 1435 | + return r; |
|---|
| 1436 | + } |
|---|
| 1437 | + |
|---|
| 884 | 1438 | r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, |
|---|
| 885 | 1439 | &adev->gmc.vm_fault); |
|---|
| 886 | 1440 | |
|---|
| 887 | 1441 | if (r) |
|---|
| 888 | 1442 | return r; |
|---|
| 1443 | + |
|---|
| 1444 | + if (!amdgpu_sriov_vf(adev)) { |
|---|
| 1445 | + /* interrupt sent to DF. */ |
|---|
| 1446 | + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, |
|---|
| 1447 | + &adev->gmc.ecc_irq); |
|---|
| 1448 | + if (r) |
|---|
| 1449 | + return r; |
|---|
| 1450 | + } |
|---|
| 889 | 1451 | |
|---|
| 890 | 1452 | /* Set the internal MC address mask |
|---|
| 891 | 1453 | * This is the max address of the GPU's |
|---|
| .. | .. |
|---|
| 893 | 1455 | */ |
|---|
| 894 | 1456 | adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ |
|---|
| 895 | 1457 | |
|---|
| 896 | | - /* set DMA mask + need_dma32 flags. |
|---|
| 897 | | - * PCIE - can handle 44-bits. |
|---|
| 898 | | - * IGP - can handle 44-bits |
|---|
| 899 | | - * PCI - dma32 for legacy pci gart, 44 bits on vega10 |
|---|
| 900 | | - */ |
|---|
| 901 | | - adev->need_dma32 = false; |
|---|
| 902 | | - dma_bits = adev->need_dma32 ? 32 : 44; |
|---|
| 903 | | - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); |
|---|
| 1458 | + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); |
|---|
| 904 | 1459 | if (r) { |
|---|
| 905 | | - adev->need_dma32 = true; |
|---|
| 906 | | - dma_bits = 32; |
|---|
| 907 | 1460 | printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); |
|---|
| 1461 | + return r; |
|---|
| 908 | 1462 | } |
|---|
| 909 | | - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); |
|---|
| 910 | | - if (r) { |
|---|
| 911 | | - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); |
|---|
| 912 | | - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); |
|---|
| 1463 | + adev->need_swiotlb = drm_need_swiotlb(44); |
|---|
| 1464 | + |
|---|
| 1465 | + if (adev->gmc.xgmi.supported) { |
|---|
| 1466 | + r = adev->gfxhub.funcs->get_xgmi_info(adev); |
|---|
| 1467 | + if (r) |
|---|
| 1468 | + return r; |
|---|
| 913 | 1469 | } |
|---|
| 914 | | - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); |
|---|
| 915 | 1470 | |
|---|
| 916 | 1471 | r = gmc_v9_0_mc_init(adev); |
|---|
| 917 | 1472 | if (r) |
|---|
| 918 | 1473 | return r; |
|---|
| 919 | 1474 | |
|---|
| 920 | | - adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); |
|---|
| 1475 | + amdgpu_gmc_get_vbios_allocations(adev); |
|---|
| 921 | 1476 | |
|---|
| 922 | 1477 | /* Memory manager */ |
|---|
| 923 | 1478 | r = amdgpu_bo_init(adev); |
|---|
| .. | .. |
|---|
| 931 | 1486 | /* |
|---|
| 932 | 1487 | * number of VMs |
|---|
| 933 | 1488 | * VMID 0 is reserved for System |
|---|
| 934 | | - * amdgpu graphics/compute will use VMIDs 1-7 |
|---|
| 935 | | - * amdkfd will use VMIDs 8-15 |
|---|
| 1489 | + * amdgpu graphics/compute will use VMIDs 1..n-1 |
|---|
| 1490 | + * amdkfd will use VMIDs n..15 |
|---|
| 1491 | + * |
|---|
| 1492 | + * The first KFD VMID is 8 for GPUs with graphics, 3 for |
|---|
| 1493 | + * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs |
|---|
| 1494 | + * for video processing. |
|---|
| 936 | 1495 | */ |
|---|
| 937 | | - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; |
|---|
| 938 | | - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; |
|---|
| 1496 | + adev->vm_manager.first_kfd_vmid = |
|---|
| 1497 | + adev->asic_type == CHIP_ARCTURUS ? 3 : 8; |
|---|
| 939 | 1498 | |
|---|
| 940 | 1499 | amdgpu_vm_manager_init(adev); |
|---|
| 1500 | + |
|---|
| 1501 | + gmc_v9_0_save_registers(adev); |
|---|
| 941 | 1502 | |
|---|
| 942 | 1503 | return 0; |
|---|
| 943 | 1504 | } |
|---|
| .. | .. |
|---|
| 946 | 1507 | { |
|---|
| 947 | 1508 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 948 | 1509 | |
|---|
| 1510 | + amdgpu_gmc_ras_fini(adev); |
|---|
| 949 | 1511 | amdgpu_gem_force_release(adev); |
|---|
| 950 | 1512 | amdgpu_vm_manager_fini(adev); |
|---|
| 951 | | - |
|---|
| 952 | | - /* |
|---|
| 953 | | - * TODO: |
|---|
| 954 | | - * Currently there is a bug where some memory client outside |
|---|
| 955 | | - * of the driver writes to first 8M of VRAM on S3 resume, |
|---|
| 956 | | - * this overrides GART which by default gets placed in first 8M and |
|---|
| 957 | | - * causes VM_FAULTS once GTT is accessed. |
|---|
| 958 | | - * Keep the stolen memory reservation until the while this is not solved. |
|---|
| 959 | | - * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init |
|---|
| 960 | | - */ |
|---|
| 961 | | - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); |
|---|
| 962 | | - |
|---|
| 963 | 1513 | amdgpu_gart_table_vram_free(adev); |
|---|
| 964 | 1514 | amdgpu_bo_fini(adev); |
|---|
| 965 | 1515 | amdgpu_gart_fini(adev); |
|---|
| .. | .. |
|---|
| 972 | 1522 | |
|---|
| 973 | 1523 | switch (adev->asic_type) { |
|---|
| 974 | 1524 | case CHIP_VEGA10: |
|---|
| 1525 | + if (amdgpu_sriov_vf(adev)) |
|---|
| 1526 | + break; |
|---|
| 1527 | + fallthrough; |
|---|
| 975 | 1528 | case CHIP_VEGA20: |
|---|
| 976 | 1529 | soc15_program_register_sequence(adev, |
|---|
| 977 | 1530 | golden_settings_mmhub_1_0_0, |
|---|
| .. | .. |
|---|
| 983 | 1536 | case CHIP_VEGA12: |
|---|
| 984 | 1537 | break; |
|---|
| 985 | 1538 | case CHIP_RAVEN: |
|---|
| 1539 | + /* TODO for renoir */ |
|---|
| 986 | 1540 | soc15_program_register_sequence(adev, |
|---|
| 987 | 1541 | golden_settings_athub_1_0_0, |
|---|
| 988 | 1542 | ARRAY_SIZE(golden_settings_athub_1_0_0)); |
|---|
| .. | .. |
|---|
| 999 | 1553 | * |
|---|
| 1000 | 1554 | * This restores register values, saved at suspend. |
|---|
| 1001 | 1555 | */ |
|---|
| 1002 | | -static void gmc_v9_0_restore_registers(struct amdgpu_device *adev) |
|---|
| 1556 | +void gmc_v9_0_restore_registers(struct amdgpu_device *adev) |
|---|
| 1003 | 1557 | { |
|---|
| 1004 | | - if (adev->asic_type == CHIP_RAVEN) |
|---|
| 1005 | | - WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); |
|---|
| 1558 | + if (adev->asic_type == CHIP_RAVEN) { |
|---|
| 1559 | + WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); |
|---|
| 1560 | + WARN_ON(adev->gmc.sdpif_register != |
|---|
| 1561 | + RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0)); |
|---|
| 1562 | + } |
|---|
| 1006 | 1563 | } |
|---|
| 1007 | 1564 | |
|---|
| 1008 | 1565 | /** |
|---|
| .. | .. |
|---|
| 1013 | 1570 | static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) |
|---|
| 1014 | 1571 | { |
|---|
| 1015 | 1572 | int r; |
|---|
| 1016 | | - bool value; |
|---|
| 1017 | | - u32 tmp; |
|---|
| 1018 | 1573 | |
|---|
| 1019 | | - amdgpu_device_program_register_sequence(adev, |
|---|
| 1020 | | - golden_settings_vega10_hdp, |
|---|
| 1021 | | - ARRAY_SIZE(golden_settings_vega10_hdp)); |
|---|
| 1022 | | - |
|---|
| 1023 | | - if (adev->gart.robj == NULL) { |
|---|
| 1574 | + if (adev->gart.bo == NULL) { |
|---|
| 1024 | 1575 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
|---|
| 1025 | 1576 | return -EINVAL; |
|---|
| 1026 | 1577 | } |
|---|
| .. | .. |
|---|
| 1028 | 1579 | if (r) |
|---|
| 1029 | 1580 | return r; |
|---|
| 1030 | 1581 | |
|---|
| 1582 | + r = adev->gfxhub.funcs->gart_enable(adev); |
|---|
| 1583 | + if (r) |
|---|
| 1584 | + return r; |
|---|
| 1585 | + |
|---|
| 1586 | + r = adev->mmhub.funcs->gart_enable(adev); |
|---|
| 1587 | + if (r) |
|---|
| 1588 | + return r; |
|---|
| 1589 | + |
|---|
| 1590 | + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
|---|
| 1591 | + (unsigned)(adev->gmc.gart_size >> 20), |
|---|
| 1592 | + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); |
|---|
| 1593 | + adev->gart.ready = true; |
|---|
| 1594 | + return 0; |
|---|
| 1595 | +} |
|---|
| 1596 | + |
|---|
| 1597 | +static int gmc_v9_0_hw_init(void *handle) |
|---|
| 1598 | +{ |
|---|
| 1599 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1600 | + bool value; |
|---|
| 1601 | + int r, i; |
|---|
| 1602 | + u32 tmp; |
|---|
| 1603 | + |
|---|
| 1604 | + /* The sequence of these two function calls matters.*/ |
|---|
| 1605 | + gmc_v9_0_init_golden_registers(adev); |
|---|
| 1606 | + |
|---|
| 1607 | + if (adev->mode_info.num_crtc) { |
|---|
| 1608 | + if (adev->asic_type != CHIP_ARCTURUS) { |
|---|
| 1609 | + /* Lockout access through VGA aperture*/ |
|---|
| 1610 | + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); |
|---|
| 1611 | + |
|---|
| 1612 | + /* disable VGA render */ |
|---|
| 1613 | + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); |
|---|
| 1614 | + } |
|---|
| 1615 | + } |
|---|
| 1616 | + |
|---|
| 1617 | + amdgpu_device_program_register_sequence(adev, |
|---|
| 1618 | + golden_settings_vega10_hdp, |
|---|
| 1619 | + ARRAY_SIZE(golden_settings_vega10_hdp)); |
|---|
| 1620 | + |
|---|
| 1621 | + if (adev->mmhub.funcs->update_power_gating) |
|---|
| 1622 | + adev->mmhub.funcs->update_power_gating(adev, true); |
|---|
| 1623 | + |
|---|
| 1031 | 1624 | switch (adev->asic_type) { |
|---|
| 1032 | | - case CHIP_RAVEN: |
|---|
| 1033 | | - mmhub_v1_0_initialize_power_gating(adev); |
|---|
| 1034 | | - mmhub_v1_0_update_power_gating(adev, true); |
|---|
| 1625 | + case CHIP_ARCTURUS: |
|---|
| 1626 | + WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); |
|---|
| 1035 | 1627 | break; |
|---|
| 1036 | 1628 | default: |
|---|
| 1037 | 1629 | break; |
|---|
| 1038 | 1630 | } |
|---|
| 1039 | | - |
|---|
| 1040 | | - r = gfxhub_v1_0_gart_enable(adev); |
|---|
| 1041 | | - if (r) |
|---|
| 1042 | | - return r; |
|---|
| 1043 | | - |
|---|
| 1044 | | - r = mmhub_v1_0_gart_enable(adev); |
|---|
| 1045 | | - if (r) |
|---|
| 1046 | | - return r; |
|---|
| 1047 | 1631 | |
|---|
| 1048 | 1632 | WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); |
|---|
| 1049 | 1633 | |
|---|
| .. | .. |
|---|
| 1054 | 1638 | WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); |
|---|
| 1055 | 1639 | |
|---|
| 1056 | 1640 | /* After HDP is initialized, flush HDP.*/ |
|---|
| 1057 | | - adev->nbio_funcs->hdp_flush(adev, NULL); |
|---|
| 1641 | + adev->nbio.funcs->hdp_flush(adev, NULL); |
|---|
| 1058 | 1642 | |
|---|
| 1059 | 1643 | if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) |
|---|
| 1060 | 1644 | value = false; |
|---|
| 1061 | 1645 | else |
|---|
| 1062 | 1646 | value = true; |
|---|
| 1063 | 1647 | |
|---|
| 1064 | | - gfxhub_v1_0_set_fault_enable_default(adev, value); |
|---|
| 1065 | | - mmhub_v1_0_set_fault_enable_default(adev, value); |
|---|
| 1066 | | - gmc_v9_0_flush_gpu_tlb(adev, 0); |
|---|
| 1067 | | - |
|---|
| 1068 | | - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
|---|
| 1069 | | - (unsigned)(adev->gmc.gart_size >> 20), |
|---|
| 1070 | | - (unsigned long long)adev->gart.table_addr); |
|---|
| 1071 | | - adev->gart.ready = true; |
|---|
| 1072 | | - return 0; |
|---|
| 1073 | | -} |
|---|
| 1074 | | - |
|---|
| 1075 | | -static int gmc_v9_0_hw_init(void *handle) |
|---|
| 1076 | | -{ |
|---|
| 1077 | | - int r; |
|---|
| 1078 | | - struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1079 | | - |
|---|
| 1080 | | - /* The sequence of these two function calls matters.*/ |
|---|
| 1081 | | - gmc_v9_0_init_golden_registers(adev); |
|---|
| 1082 | | - |
|---|
| 1083 | | - if (adev->mode_info.num_crtc) { |
|---|
| 1084 | | - /* Lockout access through VGA aperture*/ |
|---|
| 1085 | | - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); |
|---|
| 1086 | | - |
|---|
| 1087 | | - /* disable VGA render */ |
|---|
| 1088 | | - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); |
|---|
| 1648 | + if (!amdgpu_sriov_vf(adev)) { |
|---|
| 1649 | + adev->gfxhub.funcs->set_fault_enable_default(adev, value); |
|---|
| 1650 | + adev->mmhub.funcs->set_fault_enable_default(adev, value); |
|---|
| 1089 | 1651 | } |
|---|
| 1652 | + for (i = 0; i < adev->num_vmhubs; ++i) |
|---|
| 1653 | + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); |
|---|
| 1654 | + |
|---|
| 1655 | + if (adev->umc.funcs && adev->umc.funcs->init_registers) |
|---|
| 1656 | + adev->umc.funcs->init_registers(adev); |
|---|
| 1090 | 1657 | |
|---|
| 1091 | 1658 | r = gmc_v9_0_gart_enable(adev); |
|---|
| 1092 | 1659 | |
|---|
| 1093 | 1660 | return r; |
|---|
| 1094 | | -} |
|---|
| 1095 | | - |
|---|
| 1096 | | -/** |
|---|
| 1097 | | - * gmc_v9_0_save_registers - saves regs |
|---|
| 1098 | | - * |
|---|
| 1099 | | - * @adev: amdgpu_device pointer |
|---|
| 1100 | | - * |
|---|
| 1101 | | - * This saves potential register values that should be |
|---|
| 1102 | | - * restored upon resume |
|---|
| 1103 | | - */ |
|---|
| 1104 | | -static void gmc_v9_0_save_registers(struct amdgpu_device *adev) |
|---|
| 1105 | | -{ |
|---|
| 1106 | | - if (adev->asic_type == CHIP_RAVEN) |
|---|
| 1107 | | - adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); |
|---|
| 1108 | 1661 | } |
|---|
| 1109 | 1662 | |
|---|
| 1110 | 1663 | /** |
|---|
| .. | .. |
|---|
| 1116 | 1669 | */ |
|---|
| 1117 | 1670 | static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) |
|---|
| 1118 | 1671 | { |
|---|
| 1119 | | - gfxhub_v1_0_gart_disable(adev); |
|---|
| 1120 | | - mmhub_v1_0_gart_disable(adev); |
|---|
| 1672 | + adev->gfxhub.funcs->gart_disable(adev); |
|---|
| 1673 | + adev->mmhub.funcs->gart_disable(adev); |
|---|
| 1121 | 1674 | amdgpu_gart_table_vram_unpin(adev); |
|---|
| 1122 | 1675 | } |
|---|
| 1123 | 1676 | |
|---|
| 1124 | 1677 | static int gmc_v9_0_hw_fini(void *handle) |
|---|
| 1125 | 1678 | { |
|---|
| 1126 | 1679 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1680 | + |
|---|
| 1681 | + gmc_v9_0_gart_disable(adev); |
|---|
| 1127 | 1682 | |
|---|
| 1128 | 1683 | if (amdgpu_sriov_vf(adev)) { |
|---|
| 1129 | 1684 | /* full access mode, so don't touch any GMC register */ |
|---|
| .. | .. |
|---|
| 1132 | 1687 | } |
|---|
| 1133 | 1688 | |
|---|
| 1134 | 1689 | amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); |
|---|
| 1135 | | - gmc_v9_0_gart_disable(adev); |
|---|
| 1136 | 1690 | |
|---|
| 1137 | 1691 | return 0; |
|---|
| 1138 | 1692 | } |
|---|
| 1139 | 1693 | |
|---|
| 1140 | 1694 | static int gmc_v9_0_suspend(void *handle) |
|---|
| 1141 | 1695 | { |
|---|
| 1142 | | - int r; |
|---|
| 1143 | 1696 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1144 | 1697 | |
|---|
| 1145 | | - r = gmc_v9_0_hw_fini(adev); |
|---|
| 1146 | | - if (r) |
|---|
| 1147 | | - return r; |
|---|
| 1148 | | - |
|---|
| 1149 | | - gmc_v9_0_save_registers(adev); |
|---|
| 1150 | | - |
|---|
| 1151 | | - return 0; |
|---|
| 1698 | + return gmc_v9_0_hw_fini(adev); |
|---|
| 1152 | 1699 | } |
|---|
| 1153 | 1700 | |
|---|
| 1154 | 1701 | static int gmc_v9_0_resume(void *handle) |
|---|
| .. | .. |
|---|
| 1156 | 1703 | int r; |
|---|
| 1157 | 1704 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1158 | 1705 | |
|---|
| 1159 | | - gmc_v9_0_restore_registers(adev); |
|---|
| 1160 | 1706 | r = gmc_v9_0_hw_init(adev); |
|---|
| 1161 | 1707 | if (r) |
|---|
| 1162 | 1708 | return r; |
|---|
| .. | .. |
|---|
| 1189 | 1735 | { |
|---|
| 1190 | 1736 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1191 | 1737 | |
|---|
| 1192 | | - return mmhub_v1_0_set_clockgating(adev, state); |
|---|
| 1738 | + adev->mmhub.funcs->set_clockgating(adev, state); |
|---|
| 1739 | + |
|---|
| 1740 | + athub_v1_0_set_clockgating(adev, state); |
|---|
| 1741 | + |
|---|
| 1742 | + return 0; |
|---|
| 1193 | 1743 | } |
|---|
| 1194 | 1744 | |
|---|
| 1195 | 1745 | static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) |
|---|
| 1196 | 1746 | { |
|---|
| 1197 | 1747 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 1198 | 1748 | |
|---|
| 1199 | | - mmhub_v1_0_get_clockgating(adev, flags); |
|---|
| 1749 | + adev->mmhub.funcs->get_clockgating(adev, flags); |
|---|
| 1750 | + |
|---|
| 1751 | + athub_v1_0_get_clockgating(adev, flags); |
|---|
| 1200 | 1752 | } |
|---|
| 1201 | 1753 | |
|---|
| 1202 | 1754 | static int gmc_v9_0_set_powergating_state(void *handle, |
|---|