| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Broadcom STB SoCs Bus Unit Interface controls |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (C) 2015, Broadcom Corporation |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 8 | | - * published by the Free Software Foundation. |
|---|
| 9 | | - * |
|---|
| 10 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 11 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 | | - * GNU General Public License for more details. |
|---|
| 14 | 6 | */ |
|---|
| 15 | 7 | |
|---|
| 16 | 8 | #define pr_fmt(fmt) "brcmstb: " KBUILD_MODNAME ": " fmt |
|---|
| .. | .. |
|---|
| 20 | 12 | #include <linux/of_address.h> |
|---|
| 21 | 13 | #include <linux/syscore_ops.h> |
|---|
| 22 | 14 | #include <linux/soc/brcmstb/brcmstb.h> |
|---|
| 15 | + |
|---|
| 16 | +#define RACENPREF_MASK 0x3 |
|---|
| 17 | +#define RACPREFINST_SHIFT 0 |
|---|
| 18 | +#define RACENINST_SHIFT 2 |
|---|
| 19 | +#define RACPREFDATA_SHIFT 4 |
|---|
| 20 | +#define RACENDATA_SHIFT 6 |
|---|
| 21 | +#define RAC_CPU_SHIFT 8 |
|---|
| 22 | +#define RACCFG_MASK 0xff |
|---|
| 23 | +#define DPREF_LINE_2_SHIFT 24 |
|---|
| 24 | +#define DPREF_LINE_2_MASK 0xff |
|---|
| 25 | + |
|---|
| 26 | +/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ |
|---|
| 27 | +#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ |
|---|
| 28 | + RACENPREF_MASK << RACENINST_SHIFT | \ |
|---|
| 29 | + 1 << RACPREFDATA_SHIFT | \ |
|---|
| 30 | + RACENPREF_MASK << RACENDATA_SHIFT) |
|---|
| 23 | 31 | |
|---|
| 24 | 32 | #define CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK 0x70000000 |
|---|
| 25 | 33 | #define CPU_CREDIT_REG_MCPx_READ_CRED_MASK 0xf |
|---|
| .. | .. |
|---|
| 39 | 47 | static bool mcp_wr_pairing_en; |
|---|
| 40 | 48 | static const int *cpubiuctrl_regs; |
|---|
| 41 | 49 | |
|---|
| 50 | +enum cpubiuctrl_regs { |
|---|
| 51 | + CPU_CREDIT_REG = 0, |
|---|
| 52 | + CPU_MCP_FLOW_REG, |
|---|
| 53 | + CPU_WRITEBACK_CTRL_REG, |
|---|
| 54 | + RAC_CONFIG0_REG, |
|---|
| 55 | + RAC_CONFIG1_REG, |
|---|
| 56 | + NUM_CPU_BIUCTRL_REGS, |
|---|
| 57 | +}; |
|---|
| 58 | + |
|---|
| 42 | 59 | static inline u32 cbc_readl(int reg) |
|---|
| 43 | 60 | { |
|---|
| 44 | 61 | int offset = cpubiuctrl_regs[reg]; |
|---|
| 45 | 62 | |
|---|
| 46 | | - if (offset == -1) |
|---|
| 63 | + if (offset == -1 || |
|---|
| 64 | + (IS_ENABLED(CONFIG_CACHE_B15_RAC) && reg >= RAC_CONFIG0_REG)) |
|---|
| 47 | 65 | return (u32)-1; |
|---|
| 48 | 66 | |
|---|
| 49 | 67 | return readl_relaxed(cpubiuctrl_base + offset); |
|---|
| .. | .. |
|---|
| 53 | 71 | { |
|---|
| 54 | 72 | int offset = cpubiuctrl_regs[reg]; |
|---|
| 55 | 73 | |
|---|
| 56 | | - if (offset == -1) |
|---|
| 74 | + if (offset == -1 || |
|---|
| 75 | + (IS_ENABLED(CONFIG_CACHE_B15_RAC) && reg >= RAC_CONFIG0_REG)) |
|---|
| 57 | 76 | return; |
|---|
| 58 | 77 | |
|---|
| 59 | 78 | writel(val, cpubiuctrl_base + offset); |
|---|
| 60 | 79 | } |
|---|
| 61 | 80 | |
|---|
| 62 | | -enum cpubiuctrl_regs { |
|---|
| 63 | | - CPU_CREDIT_REG = 0, |
|---|
| 64 | | - CPU_MCP_FLOW_REG, |
|---|
| 65 | | - CPU_WRITEBACK_CTRL_REG |
|---|
| 66 | | -}; |
|---|
| 67 | | - |
|---|
| 68 | 81 | static const int b15_cpubiuctrl_regs[] = { |
|---|
| 69 | 82 | [CPU_CREDIT_REG] = 0x184, |
|---|
| 70 | 83 | [CPU_MCP_FLOW_REG] = -1, |
|---|
| 71 | 84 | [CPU_WRITEBACK_CTRL_REG] = -1, |
|---|
| 85 | + [RAC_CONFIG0_REG] = -1, |
|---|
| 86 | + [RAC_CONFIG1_REG] = -1, |
|---|
| 72 | 87 | }; |
|---|
| 73 | 88 | |
|---|
| 74 | | -/* Odd cases, e.g: 7260 */ |
|---|
| 89 | +/* Odd cases, e.g: 7260A0 */ |
|---|
| 75 | 90 | static const int b53_cpubiuctrl_no_wb_regs[] = { |
|---|
| 76 | 91 | [CPU_CREDIT_REG] = 0x0b0, |
|---|
| 77 | 92 | [CPU_MCP_FLOW_REG] = 0x0b4, |
|---|
| 78 | 93 | [CPU_WRITEBACK_CTRL_REG] = -1, |
|---|
| 94 | + [RAC_CONFIG0_REG] = 0x78, |
|---|
| 95 | + [RAC_CONFIG1_REG] = 0x7c, |
|---|
| 79 | 96 | }; |
|---|
| 80 | 97 | |
|---|
| 81 | 98 | static const int b53_cpubiuctrl_regs[] = { |
|---|
| 82 | 99 | [CPU_CREDIT_REG] = 0x0b0, |
|---|
| 83 | 100 | [CPU_MCP_FLOW_REG] = 0x0b4, |
|---|
| 84 | 101 | [CPU_WRITEBACK_CTRL_REG] = 0x22c, |
|---|
| 102 | + [RAC_CONFIG0_REG] = 0x78, |
|---|
| 103 | + [RAC_CONFIG1_REG] = 0x7c, |
|---|
| 85 | 104 | }; |
|---|
| 86 | 105 | |
|---|
| 87 | | -#define NUM_CPU_BIUCTRL_REGS 3 |
|---|
| 106 | +static const int a72_cpubiuctrl_regs[] = { |
|---|
| 107 | + [CPU_CREDIT_REG] = 0x18, |
|---|
| 108 | + [CPU_MCP_FLOW_REG] = 0x1c, |
|---|
| 109 | + [CPU_WRITEBACK_CTRL_REG] = 0x20, |
|---|
| 110 | + [RAC_CONFIG0_REG] = 0x08, |
|---|
| 111 | + [RAC_CONFIG1_REG] = 0x0c, |
|---|
| 112 | +}; |
|---|
| 88 | 113 | |
|---|
| 89 | 114 | static int __init mcp_write_pairing_set(void) |
|---|
| 90 | 115 | { |
|---|
| .. | .. |
|---|
| 109 | 134 | return 0; |
|---|
| 110 | 135 | } |
|---|
| 111 | 136 | |
|---|
| 112 | | -static const u32 b53_mach_compat[] = { |
|---|
| 137 | +static const u32 a72_b53_mach_compat[] = { |
|---|
| 138 | + 0x7211, |
|---|
| 139 | + 0x7216, |
|---|
| 140 | + 0x72164, |
|---|
| 141 | + 0x72165, |
|---|
| 142 | + 0x7255, |
|---|
| 143 | + 0x7260, |
|---|
| 113 | 144 | 0x7268, |
|---|
| 114 | 145 | 0x7271, |
|---|
| 115 | 146 | 0x7278, |
|---|
| 116 | 147 | }; |
|---|
| 117 | 148 | |
|---|
| 118 | | -static void __init mcp_b53_set(void) |
|---|
| 149 | +/* The read-ahead cache present in the Brahma-B53 CPU is a special piece of |
|---|
| 150 | + * hardware after the integrated L2 cache of the B53 CPU complex whose purpose |
|---|
| 151 | + * is to prefetch instruction and/or data with a line size of either 64 bytes |
|---|
| 152 | + * or 256 bytes. The rationale is that the data-bus of the CPU interface is |
|---|
| 153 | + * optimized for 256-byte transactions, and enabling the read-ahead cache |
|---|
| 154 | + * provides a significant performance boost (typically twice the performance |
|---|
| 155 | + * for a memcpy benchmark application). |
|---|
| 156 | + * |
|---|
| 157 | + * The read-ahead cache is transparent for Virtual Address cache maintenance |
|---|
| 158 | + * operations: IC IVAU, DC IVAC, DC CVAC, DC CVAU and DC CIVAC. So no special |
|---|
| 159 | + * handling is needed for the DMA API above and beyond what is included in the |
|---|
| 160 | + * arm64 implementation. |
|---|
| 161 | + * |
|---|
| 162 | + * In addition, since the Point of Unification is typically between L1 and L2 |
|---|
| 163 | + * for the Brahma-B53 processor no special read-ahead cache handling is needed |
|---|
| 164 | + * for the IC IALLU and IC IALLUIS cache maintenance operations. |
|---|
| 165 | + * |
|---|
| 166 | + * However, it is not possible to specify the cache level (L3) for the cache |
|---|
| 167 | + * maintenance instructions operating by set/way to operate on the read-ahead |
|---|
| 168 | + * cache. The read-ahead cache will maintain coherency when inner cache lines |
|---|
| 169 | + * are cleaned by set/way, but if it is necessary to invalidate inner cache |
|---|
| 170 | + * lines by set/way to maintain coherency with system masters operating on |
|---|
| 171 | + * shared memory that does not have hardware support for coherency, then it |
|---|
| 172 | + * will also be necessary to explicitly invalidate the read-ahead cache. |
|---|
| 173 | + */ |
|---|
| 174 | +static void __init a72_b53_rac_enable_all(struct device_node *np) |
|---|
| 175 | +{ |
|---|
| 176 | + unsigned int cpu; |
|---|
| 177 | + u32 enable = 0, pref_dist, shift; |
|---|
| 178 | + |
|---|
| 179 | + if (IS_ENABLED(CONFIG_CACHE_B15_RAC)) |
|---|
| 180 | + return; |
|---|
| 181 | + |
|---|
| 182 | + if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) |
|---|
| 183 | + return; |
|---|
| 184 | + |
|---|
| 185 | + pref_dist = cbc_readl(RAC_CONFIG1_REG); |
|---|
| 186 | + for_each_possible_cpu(cpu) { |
|---|
| 187 | + shift = cpu * RAC_CPU_SHIFT + RACPREFDATA_SHIFT; |
|---|
| 188 | + enable |= RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT); |
|---|
| 189 | + if (cpubiuctrl_regs == a72_cpubiuctrl_regs) { |
|---|
| 190 | + enable &= ~(RACENPREF_MASK << shift); |
|---|
| 191 | + enable |= 3 << shift; |
|---|
| 192 | + pref_dist |= 1 << (cpu + DPREF_LINE_2_SHIFT); |
|---|
| 193 | + } |
|---|
| 194 | + } |
|---|
| 195 | + |
|---|
| 196 | + cbc_writel(enable, RAC_CONFIG0_REG); |
|---|
| 197 | + cbc_writel(pref_dist, RAC_CONFIG1_REG); |
|---|
| 198 | + |
|---|
| 199 | + pr_info("%pOF: Broadcom %s read-ahead cache\n", |
|---|
| 200 | + np, cpubiuctrl_regs == a72_cpubiuctrl_regs ? |
|---|
| 201 | + "Cortex-A72" : "Brahma-B53"); |
|---|
| 202 | +} |
|---|
| 203 | + |
|---|
| 204 | +static void __init mcp_a72_b53_set(void) |
|---|
| 119 | 205 | { |
|---|
| 120 | 206 | unsigned int i; |
|---|
| 121 | 207 | u32 reg; |
|---|
| 122 | 208 | |
|---|
| 123 | 209 | reg = brcmstb_get_family_id(); |
|---|
| 124 | 210 | |
|---|
| 125 | | - for (i = 0; i < ARRAY_SIZE(b53_mach_compat); i++) { |
|---|
| 126 | | - if (BRCM_ID(reg) == b53_mach_compat[i]) |
|---|
| 211 | + for (i = 0; i < ARRAY_SIZE(a72_b53_mach_compat); i++) { |
|---|
| 212 | + if (BRCM_ID(reg) == a72_b53_mach_compat[i]) |
|---|
| 127 | 213 | break; |
|---|
| 128 | 214 | } |
|---|
| 129 | 215 | |
|---|
| 130 | | - if (i == ARRAY_SIZE(b53_mach_compat)) |
|---|
| 216 | + if (i == ARRAY_SIZE(a72_b53_mach_compat)) |
|---|
| 131 | 217 | return; |
|---|
| 132 | 218 | |
|---|
| 133 | 219 | /* Set all 3 MCP interfaces to 8 credits */ |
|---|
| .. | .. |
|---|
| 165 | 251 | static int __init setup_hifcpubiuctrl_regs(struct device_node *np) |
|---|
| 166 | 252 | { |
|---|
| 167 | 253 | struct device_node *cpu_dn; |
|---|
| 254 | + u32 family_id; |
|---|
| 168 | 255 | int ret = 0; |
|---|
| 169 | 256 | |
|---|
| 170 | 257 | cpubiuctrl_base = of_iomap(np, 0); |
|---|
| .. | .. |
|---|
| 187 | 274 | cpubiuctrl_regs = b15_cpubiuctrl_regs; |
|---|
| 188 | 275 | else if (of_device_is_compatible(cpu_dn, "brcm,brahma-b53")) |
|---|
| 189 | 276 | cpubiuctrl_regs = b53_cpubiuctrl_regs; |
|---|
| 277 | + else if (of_device_is_compatible(cpu_dn, "arm,cortex-a72")) |
|---|
| 278 | + cpubiuctrl_regs = a72_cpubiuctrl_regs; |
|---|
| 190 | 279 | else { |
|---|
| 191 | 280 | pr_err("unsupported CPU\n"); |
|---|
| 192 | 281 | ret = -EINVAL; |
|---|
| 193 | 282 | } |
|---|
| 194 | 283 | of_node_put(cpu_dn); |
|---|
| 195 | 284 | |
|---|
| 196 | | - if (BRCM_ID(brcmstb_get_family_id()) == 0x7260) |
|---|
| 285 | + family_id = brcmstb_get_family_id(); |
|---|
| 286 | + if (BRCM_ID(family_id) == 0x7260 && BRCM_REV(family_id) == 0) |
|---|
| 197 | 287 | cpubiuctrl_regs = b53_cpubiuctrl_no_wb_regs; |
|---|
| 198 | 288 | out: |
|---|
| 199 | 289 | of_node_put(np); |
|---|
| .. | .. |
|---|
| 256 | 346 | return ret; |
|---|
| 257 | 347 | } |
|---|
| 258 | 348 | |
|---|
| 259 | | - mcp_b53_set(); |
|---|
| 349 | + a72_b53_rac_enable_all(np); |
|---|
| 350 | + mcp_a72_b53_set(); |
|---|
| 260 | 351 | #ifdef CONFIG_PM_SLEEP |
|---|
| 261 | 352 | register_syscore_ops(&brcmstb_cpu_credit_syscore_ops); |
|---|
| 262 | 353 | #endif |
|---|