From bedbef8ad3e75a304af6361af235302bcc61d06b Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 14 May 2024 06:39:01 +0000 Subject: [PATCH] 修改内核路径 --- kernel/drivers/soc/bcm/brcmstb/biuctrl.c | 143 +++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 117 insertions(+), 26 deletions(-) diff --git a/kernel/drivers/soc/bcm/brcmstb/biuctrl.c b/kernel/drivers/soc/bcm/brcmstb/biuctrl.c index 20b63be..7f8dc30 100644 --- a/kernel/drivers/soc/bcm/brcmstb/biuctrl.c +++ b/kernel/drivers/soc/bcm/brcmstb/biuctrl.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Broadcom STB SoCs Bus Unit Interface controls * * Copyright (C) 2015, Broadcom Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #define pr_fmt(fmt) "brcmstb: " KBUILD_MODNAME ": " fmt @@ -20,6 +12,22 @@ #include <linux/of_address.h> #include <linux/syscore_ops.h> #include <linux/soc/brcmstb/brcmstb.h> + +#define RACENPREF_MASK 0x3 +#define RACPREFINST_SHIFT 0 +#define RACENINST_SHIFT 2 +#define RACPREFDATA_SHIFT 4 +#define RACENDATA_SHIFT 6 +#define RAC_CPU_SHIFT 8 +#define RACCFG_MASK 0xff +#define DPREF_LINE_2_SHIFT 24 +#define DPREF_LINE_2_MASK 0xff + +/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ +#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ + RACENPREF_MASK << RACENINST_SHIFT | \ + 1 << RACPREFDATA_SHIFT | \ + RACENPREF_MASK << RACENDATA_SHIFT) #define CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK 0x70000000 #define CPU_CREDIT_REG_MCPx_READ_CRED_MASK 0xf @@ -39,11 +47,21 @@ static bool mcp_wr_pairing_en; static const int *cpubiuctrl_regs; +enum cpubiuctrl_regs { + CPU_CREDIT_REG = 0, + CPU_MCP_FLOW_REG, + CPU_WRITEBACK_CTRL_REG, + RAC_CONFIG0_REG, + RAC_CONFIG1_REG, + NUM_CPU_BIUCTRL_REGS, +}; + static inline u32 cbc_readl(int reg) { int offset = cpubiuctrl_regs[reg]; - if (offset == -1) + if (offset == -1 || + (IS_ENABLED(CONFIG_CACHE_B15_RAC) && reg >= RAC_CONFIG0_REG)) return (u32)-1; return readl_relaxed(cpubiuctrl_base + offset); @@ -53,38 +71,45 @@ { int offset = cpubiuctrl_regs[reg]; - if (offset == -1) + if (offset == -1 || + (IS_ENABLED(CONFIG_CACHE_B15_RAC) && reg >= RAC_CONFIG0_REG)) return; writel(val, cpubiuctrl_base + offset); } -enum cpubiuctrl_regs { - CPU_CREDIT_REG = 0, - CPU_MCP_FLOW_REG, - CPU_WRITEBACK_CTRL_REG -}; - static const int b15_cpubiuctrl_regs[] = { [CPU_CREDIT_REG] = 0x184, [CPU_MCP_FLOW_REG] = -1, [CPU_WRITEBACK_CTRL_REG] = -1, + [RAC_CONFIG0_REG] = -1, + [RAC_CONFIG1_REG] = -1, }; -/* Odd cases, e.g: 7260 */ +/* Odd cases, e.g: 7260A0 */ static const int b53_cpubiuctrl_no_wb_regs[] = { [CPU_CREDIT_REG] = 0x0b0, [CPU_MCP_FLOW_REG] = 0x0b4, [CPU_WRITEBACK_CTRL_REG] = -1, + [RAC_CONFIG0_REG] = 0x78, + [RAC_CONFIG1_REG] = 0x7c, }; static const int b53_cpubiuctrl_regs[] = { [CPU_CREDIT_REG] = 0x0b0, [CPU_MCP_FLOW_REG] = 0x0b4, [CPU_WRITEBACK_CTRL_REG] = 0x22c, + [RAC_CONFIG0_REG] = 0x78, + [RAC_CONFIG1_REG] = 0x7c, }; -#define NUM_CPU_BIUCTRL_REGS 3 +static const int a72_cpubiuctrl_regs[] = { + [CPU_CREDIT_REG] = 0x18, + [CPU_MCP_FLOW_REG] = 0x1c, + [CPU_WRITEBACK_CTRL_REG] = 0x20, + [RAC_CONFIG0_REG] = 0x08, + [RAC_CONFIG1_REG] = 0x0c, +}; static int __init mcp_write_pairing_set(void) { @@ -109,25 +134,86 @@ return 0; } -static const u32 b53_mach_compat[] = { +static const u32 a72_b53_mach_compat[] = { + 0x7211, + 0x7216, + 0x72164, + 0x72165, + 0x7255, + 0x7260, 0x7268, 0x7271, 0x7278, }; -static void __init mcp_b53_set(void) +/* The read-ahead cache present in the Brahma-B53 CPU is a special piece of + * hardware after the integrated L2 cache of the B53 CPU complex whose purpose + * is to prefetch instruction and/or data with a line size of either 64 bytes + * or 256 bytes. The rationale is that the data-bus of the CPU interface is + * optimized for 256-byte transactions, and enabling the read-ahead cache + * provides a significant performance boost (typically twice the performance + * for a memcpy benchmark application). + * + * The read-ahead cache is transparent for Virtual Address cache maintenance + * operations: IC IVAU, DC IVAC, DC CVAC, DC CVAU and DC CIVAC. So no special + * handling is needed for the DMA API above and beyond what is included in the + * arm64 implementation. + * + * In addition, since the Point of Unification is typically between L1 and L2 + * for the Brahma-B53 processor no special read-ahead cache handling is needed + * for the IC IALLU and IC IALLUIS cache maintenance operations. + * + * However, it is not possible to specify the cache level (L3) for the cache + * maintenance instructions operating by set/way to operate on the read-ahead + * cache. The read-ahead cache will maintain coherency when inner cache lines + * are cleaned by set/way, but if it is necessary to invalidate inner cache + * lines by set/way to maintain coherency with system masters operating on + * shared memory that does not have hardware support for coherency, then it + * will also be necessary to explicitly invalidate the read-ahead cache. + */ +static void __init a72_b53_rac_enable_all(struct device_node *np) +{ + unsigned int cpu; + u32 enable = 0, pref_dist, shift; + + if (IS_ENABLED(CONFIG_CACHE_B15_RAC)) + return; + + if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) + return; + + pref_dist = cbc_readl(RAC_CONFIG1_REG); + for_each_possible_cpu(cpu) { + shift = cpu * RAC_CPU_SHIFT + RACPREFDATA_SHIFT; + enable |= RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT); + if (cpubiuctrl_regs == a72_cpubiuctrl_regs) { + enable &= ~(RACENPREF_MASK << shift); + enable |= 3 << shift; + pref_dist |= 1 << (cpu + DPREF_LINE_2_SHIFT); + } + } + + cbc_writel(enable, RAC_CONFIG0_REG); + cbc_writel(pref_dist, RAC_CONFIG1_REG); + + pr_info("%pOF: Broadcom %s read-ahead cache\n", + np, cpubiuctrl_regs == a72_cpubiuctrl_regs ? + "Cortex-A72" : "Brahma-B53"); +} + +static void __init mcp_a72_b53_set(void) { unsigned int i; u32 reg; reg = brcmstb_get_family_id(); - for (i = 0; i < ARRAY_SIZE(b53_mach_compat); i++) { - if (BRCM_ID(reg) == b53_mach_compat[i]) + for (i = 0; i < ARRAY_SIZE(a72_b53_mach_compat); i++) { + if (BRCM_ID(reg) == a72_b53_mach_compat[i]) break; } - if (i == ARRAY_SIZE(b53_mach_compat)) + if (i == ARRAY_SIZE(a72_b53_mach_compat)) return; /* Set all 3 MCP interfaces to 8 credits */ @@ -165,6 +251,7 @@ static int __init setup_hifcpubiuctrl_regs(struct device_node *np) { struct device_node *cpu_dn; + u32 family_id; int ret = 0; cpubiuctrl_base = of_iomap(np, 0); @@ -187,13 +274,16 @@ cpubiuctrl_regs = b15_cpubiuctrl_regs; else if (of_device_is_compatible(cpu_dn, "brcm,brahma-b53")) cpubiuctrl_regs = b53_cpubiuctrl_regs; + else if (of_device_is_compatible(cpu_dn, "arm,cortex-a72")) + cpubiuctrl_regs = a72_cpubiuctrl_regs; else { pr_err("unsupported CPU\n"); ret = -EINVAL; } of_node_put(cpu_dn); - if (BRCM_ID(brcmstb_get_family_id()) == 0x7260) + family_id = brcmstb_get_family_id(); + if (BRCM_ID(family_id) == 0x7260 && BRCM_REV(family_id) == 0) cpubiuctrl_regs = b53_cpubiuctrl_no_wb_regs; out: of_node_put(np); @@ -256,7 +346,8 @@ return ret; } - mcp_b53_set(); + a72_b53_rac_enable_all(np); + mcp_a72_b53_set(); #ifdef CONFIG_PM_SLEEP register_syscore_ops(&brcmstb_cpu_credit_syscore_ops); #endif -- Gitblit v1.6.2