From a5969cabbb4660eab42b6ef0412cbbd1200cf14d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 12 Oct 2024 07:10:09 +0000 Subject: [PATCH] 修改led为gpio --- kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c | 810 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 654 insertions(+), 156 deletions(-) diff --git a/kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c index c3ce0fb..0a721f6 100644 --- a/kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2016-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2016-2018 Netronome Systems, Inc. */ #define pr_fmt(fmt) "NFP net bpf: " fmt @@ -264,6 +234,38 @@ emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) { emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); +} + +static void +__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + u8 defer, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_BR_ALU_BASE | + FIELD_PREP(OP_BR_ALU_A_SRC, areg) | + FIELD_PREP(OP_BR_ALU_B_SRC, breg) | + FIELD_PREP(OP_BR_ALU_DEFBR, defer) | + FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | + FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn, + reg.src_lmextn); } static void @@ -621,6 +623,13 @@ } static void +wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst) +{ + if (meta->flags & FLAG_INSN_DO_ZEXT) + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static void wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, enum nfp_relo_type relo) { @@ -856,7 +865,8 @@ } static int -data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) +data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset, + u8 dst_gpr, int size) { unsigned int i; u16 shift, sz; @@ -879,14 +889,15 @@ wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } static int -data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, - swreg lreg, swreg rreg, int size, enum cmd_mode mode) +data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 dst_gpr, swreg lreg, swreg rreg, int size, + enum cmd_mode mode) { unsigned int i; u8 mask, sz; @@ -909,33 +920,34 @@ wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } static int -data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, - u8 dst_gpr, u8 size) +data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) { - return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, - size, CMD_MODE_32b); + return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr), + offset, size, CMD_MODE_32b); } static int -data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, - u8 dst_gpr, u8 size) +data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) { swreg rega, regb; addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); - return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, + return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb, size, CMD_MODE_40b_BA); } static int -construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) +construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u16 src, u8 size) { swreg tmp_reg; @@ -951,10 +963,12 @@ emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ - return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); + return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size); } -static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +static int +construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u8 size) { swreg tmp_reg; @@ -965,7 +979,7 @@ /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - return data_ld(nfp_prog, tmp_reg, 0, size); + return data_ld(nfp_prog, meta, tmp_reg, 0, size); } static int @@ -1148,8 +1162,8 @@ unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, bool clr_gpr, lmem_step step) { - s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; - bool first = true, last; + s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; + bool first = true, narrow_ld, last; bool needs_inc = false; swreg stack_off_reg; u8 prev_gpr = 255; @@ -1157,7 +1171,8 @@ bool lm3 = true; int ret; - if (meta->ptr_not_const) { + if (meta->ptr_not_const || + meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { /* Use of the last encountered ptr_off is OK, they all have * the same alignment. Depend on low bits of value being * discarded when written to LMaddr register. @@ -1194,14 +1209,23 @@ needs_inc = true; } + + narrow_ld = clr_gpr && size < 8; + if (lm3) { + unsigned int nop_cnt; + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); - /* For size < 4 one slot will be filled by zeroing of upper. */ - wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + /* For size < 4 one slot will be filled by zeroing of upper, + * but be careful, that zeroing could be eliminated by zext + * optimization. + */ + nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3; + wrp_nops(nfp_prog, nop_cnt); } - if (clr_gpr && size < 8) - wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + if (narrow_ld) + wrp_zext(nfp_prog, meta, gpr); while (size) { u32 slice_end; @@ -1274,7 +1298,7 @@ u64 imm = insn->imm; /* sign extend */ if (skip) { - meta->skip = true; + meta->flags |= FLAG_INSN_SKIP_NOOP; return 0; } @@ -1302,9 +1326,10 @@ enum alu_op alu_op) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -1316,7 +1341,7 @@ u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -1337,8 +1362,9 @@ wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, insn->src_reg * 2, br_mask, insn->off); - wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, - insn->src_reg * 2 + 1, br_mask, insn->off); + if (is_mbpf_jmp64(meta)) + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); return 0; } @@ -1393,13 +1419,15 @@ else emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); - tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); - if (!code->swap) - emit_alu(nfp_prog, reg_none(), - reg_a(reg + 1), carry_op, tmp_reg); - else - emit_alu(nfp_prog, reg_none(), - tmp_reg, carry_op, reg_a(reg + 1)); + if (is_mbpf_jmp64(meta)) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), carry_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, carry_op, reg_a(reg + 1)); + } emit_br(nfp_prog, code->br_mask, insn->off, 0); @@ -1426,8 +1454,9 @@ } emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); - emit_alu(nfp_prog, reg_none(), - reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + if (is_mbpf_jmp64(meta)) + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); emit_br(nfp_prog, code->br_mask, insn->off, 0); return 0; @@ -1701,7 +1730,7 @@ s64 lm_off; /* We only have to reload LM0 if the key is not at start of stack */ - lm_off = nfp_prog->stack_depth; + lm_off = nfp_prog->stack_frame_depth; lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; load_lm_ptr = meta->arg2.var_off || lm_off; @@ -1814,10 +1843,10 @@ swreg stack_depth_reg; stack_depth_reg = ur_load_imm_any(nfp_prog, - nfp_prog->stack_depth, + nfp_prog->stack_frame_depth, stack_imm(nfp_prog)); - emit_alu(nfp_prog, reg_both(dst), - stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), + ALU_OP_ADD, stack_depth_reg); wrp_immed(nfp_prog, reg_both(dst + 1), 0); } else { wrp_reg_mov(nfp_prog, dst, src); @@ -1966,6 +1995,9 @@ */ static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, @@ -2078,6 +2110,9 @@ */ static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2179,6 +2214,9 @@ */ static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2380,23 +2418,132 @@ u8 dst = meta->insn.dst_reg * 2; emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int +__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) { + /* Set signedness bit (MSB of result). */ + emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + } + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __ashr_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + /* NOTE: the first insn will set both indirect shift amount (source A) + * and signedness bit (MSB of result). + */ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF); + wrp_zext(nfp_prog, meta, dst); + + return 0; +} + +static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __ashr_imm(nfp_prog, meta, dst, insn->imm); +} + +static int +__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm(nfp_prog, meta, dst, insn->imm); +} + +static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF); + wrp_zext(nfp_prog, meta, dst); + return 0; +} + +static int +__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + wrp_zext(nfp_prog, meta, dst); return 0; } static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - if (!insn->imm) - return 1; /* TODO: zero shift means indirect */ + return __shl_imm(nfp_prog, meta, dst, insn->imm); +} - emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), - reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), - SHF_SC_L_SHF, insn->imm); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); +static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm(nfp_prog, meta, dst, umin); + + src = insn->src_reg * 2; + shl_reg64_lt32_low(nfp_prog, dst, src); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2458,34 +2605,34 @@ static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 1); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1); } static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 2); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2); } static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 4); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4); } static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 1); } static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 2); } static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 4); } @@ -2505,17 +2652,17 @@ switch (meta->insn.off) { case offsetof(struct __sk_buff, len): - if (size != FIELD_SIZEOF(struct __sk_buff, len)) + if (size != sizeof_field(struct __sk_buff, len)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); break; case offsetof(struct __sk_buff, data): - if (size != FIELD_SIZEOF(struct __sk_buff, data)) + if (size != sizeof_field(struct __sk_buff, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; case offsetof(struct __sk_buff, data_end): - if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) + if (size != sizeof_field(struct __sk_buff, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@ -2536,12 +2683,12 @@ switch (meta->insn.off) { case offsetof(struct xdp_md, data): - if (size != FIELD_SIZEOF(struct xdp_md, data)) + if (size != sizeof_field(struct xdp_md, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; case offsetof(struct xdp_md, data_end): - if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + if (size != sizeof_field(struct xdp_md, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@ -2563,7 +2710,7 @@ tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); - return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, + return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2, tmp_reg, meta->insn.dst_reg * 2, size); } @@ -2575,7 +2722,7 @@ tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); - return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, + return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2, tmp_reg, meta->insn.dst_reg * 2, size); } @@ -2636,7 +2783,7 @@ wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); if (!len_mid) { - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } @@ -2644,7 +2791,7 @@ if (size <= REG_WIDTH) { wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else { swreg src_hi = reg_xfer(idx + 2); @@ -2675,10 +2822,10 @@ if (size < REG_WIDTH) { wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else if (size == REG_WIDTH) { wrp_mov(nfp_prog, dst_lo, src_lo); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else { swreg src_hi = reg_xfer(idx + 1); @@ -3008,30 +3155,37 @@ return 0; } +static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + swreg tmp_reg; + + tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ + u8 dst_gpr = insn->dst_reg * 2; swreg tmp_reg; - if (!imm) { - meta->skip = true; - return 0; - } - - if (imm & ~0U) { - tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(dst_gpr), ALU_OP_AND, tmp_reg); + /* Upper word of the mask can only be 0 or ~0 from sign extension, + * so either ignore it or OR the whole thing in. + */ + if (is_mbpf_jmp64(meta) && imm >> 32) { emit_alu(nfp_prog, reg_none(), - reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); - emit_br(nfp_prog, BR_BNE, insn->off, 0); + reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog)); } - - if (imm >> 32) { - tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); - emit_alu(nfp_prog, reg_none(), - reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); - emit_br(nfp_prog, BR_BNE, insn->off, 0); - } + emit_br(nfp_prog, BR_BNE, insn->off, 0); return 0; } @@ -3040,11 +3194,16 @@ { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ + bool is_jmp32 = is_mbpf_jmp32(meta); swreg tmp_reg; if (!imm) { - emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), - ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + if (is_jmp32) + emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE, + reg_b(insn->dst_reg * 2)); + else + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); emit_br(nfp_prog, BR_BNE, insn->off, 0); return 0; } @@ -3053,6 +3212,9 @@ emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); emit_br(nfp_prog, BR_BNE, insn->off, 0); + + if (is_jmp32) + return 0; tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); emit_alu(nfp_prog, reg_none(), @@ -3068,10 +3230,13 @@ emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), ALU_OP_XOR, reg_b(insn->src_reg * 2)); - emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), - ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); - emit_alu(nfp_prog, reg_none(), - imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + if (is_mbpf_jmp64(meta)) { + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, + reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, + imm_b(nfp_prog)); + } emit_br(nfp_prog, BR_BEQ, insn->off, 0); return 0; @@ -3087,7 +3252,93 @@ return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); } -static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int +bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_tgt, stack_depth, offset_br; + swreg tmp_reg; + + stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); + /* Space for saving the return address is accounted for by the callee, + * so stack_depth can be zero for the main function. + */ + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + } + + /* Two cases for jumping to the callee: + * + * - If callee uses and needs to save R6~R9 then: + * 1. Put the start offset of the callee into imm_b(). This will + * require a fixup step, as we do not necessarily know this + * address yet. + * 2. Put the return address from the callee to the caller into + * register ret_reg(). + * 3. (After defer slots are consumed) Jump to the subroutine that + * pushes the registers to the stack. + * The subroutine acts as a trampoline, and returns to the address in + * imm_b(), i.e. jumps to the callee. + * + * - If callee does not need to save R6~R9 then just load return + * address to the caller in ret_reg(), and jump to the callee + * directly. + * + * Using ret_reg() to pass the return address to the callee is set here + * as a convention. The callee can then push this address onto its + * stack frame in its prologue. The advantages of passing the return + * address through ret_reg(), instead of pushing it to the stack right + * here, are the following: + * - It looks cleaner. + * - If the called function is called multiple time, we get a lower + * program size. + * - We save two no-op instructions that should be added just before + * the emit_br() when stack depth is not null otherwise. + * - If we ever find a register to hold the return address during whole + * execution of the callee, we will not have to push the return + * address to the stack for leaf functions. + */ + if (!meta->jmp_dst) { + pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); + return -ELOOP; + } + if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, + RELO_BR_GO_CALL_PUSH_REGS); + offset_br = nfp_prog_current_offset(nfp_prog); + wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); + } else { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; + emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1); + offset_br = nfp_prog_current_offset(nfp_prog); + } + wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + wrp_nops(nfp_prog, 3); + } + + meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); + meta->num_insns_after_br -= offset_br; + + return 0; +} + +static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { switch (meta->insn.imm) { case BPF_FUNC_xdp_adjust_head: @@ -3108,11 +3359,57 @@ } } +static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (is_mbpf_pseudo_call(meta)) + return bpf_to_bpf_call(nfp_prog, meta); + else + return helper_call(nfp_prog, meta); +} + +static bool nfp_is_main_function(struct nfp_insn_meta *meta) +{ + return meta->subprog_idx == 0; +} + static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); return 0; +} + +static int +nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { + /* Pop R6~R9 to the stack via related subroutine. + * We loaded the return address to the caller into ret_reg(). + * This means that the subroutine does not come back here, we + * make it jump back to the subprogram caller directly! + */ + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, + RELO_BR_GO_CALL_POP_REGS); + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + } else { + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + /* Jump back to caller if no callee-saved registers were used + * by the subprogram. + */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); + } + + return 0; +} + +static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_is_main_function(meta)) + return goto_out(nfp_prog, meta); + else + return nfp_subprog_epilogue(nfp_prog, meta); } static const instr_cb_t instr_cb[256] = { @@ -3156,7 +3453,12 @@ [BPF_ALU | BPF_DIV | BPF_X] = div_reg, [BPF_ALU | BPF_DIV | BPF_K] = div_imm, [BPF_ALU | BPF_NEG] = neg_reg, + [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, + [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, + [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, + [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, [BPF_LD | BPF_ABS | BPF_B] = data_ld1, @@ -3202,22 +3504,62 @@ [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, + [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, + [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, + [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, - [BPF_JMP | BPF_EXIT] = goto_out, + [BPF_JMP | BPF_EXIT] = jmp_exit, }; /* --- Assembler logic --- */ +static int +nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct nfp_insn_meta *jmp_dst, u32 br_idx) +{ + if (immed_get_value(nfp_prog->prog[br_idx + 1])) { + pr_err("BUG: failed to fix up callee register saving\n"); + return -EINVAL; + } + + immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off); + + return 0; +} + static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta, *jmp_dst; u32 idx, br_idx; + int err; list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; - if (meta->insn.code == (BPF_JMP | BPF_CALL)) + if (!is_mbpf_jmp(meta)) continue; - if (BPF_CLASS(meta->insn.code) != BPF_JMP) + if (meta->insn.code == (BPF_JMP | BPF_EXIT) && + !nfp_is_main_function(meta)) + continue; + if (is_mbpf_helper_call(meta)) continue; if (list_is_last(&meta->l, &nfp_prog->insns)) @@ -3225,14 +3567,26 @@ else br_idx = list_next_entry(meta, l)->off - 1; + /* For BPF-to-BPF function call, a stack adjustment sequence is + * generated after the return instruction. Therefore, we must + * withdraw the length of this sequence to have br_idx pointing + * to where the "branch" NFP instruction is expected to be. + */ + if (is_mbpf_pseudo_call(meta)) + br_idx -= meta->num_insns_after_br; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", br_idx, meta->insn.code, nfp_prog->prog[br_idx]); return -ELOOP; } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + continue; + /* Leave special branches for later */ if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != - RELO_BR_REL) + RELO_BR_REL && !is_mbpf_pseudo_call(meta)) continue; if (!meta->jmp_dst) { @@ -3242,10 +3596,22 @@ jmp_dst = meta->jmp_dst; - if (jmp_dst->skip) { + if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { pr_err("Branch landing on removed instruction!!\n"); return -ELOOP; } + + if (is_mbpf_pseudo_call(meta) && + nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { + err = nfp_fixup_immed_relo(nfp_prog, meta, + jmp_dst, br_idx); + if (err) + return err; + } + + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) + continue; for (idx = meta->off; idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) @@ -3262,6 +3628,27 @@ wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); emit_alu(nfp_prog, plen_reg(nfp_prog), plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); +} + +static void +nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* Save return address into the stack. */ + wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); +} + +static void +nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; + + nfp_prog->stack_frame_depth = round_up(depth, 4); + nfp_subprog_prologue(nfp_prog, meta); +} + +bool nfp_is_subprog_start(struct nfp_insn_meta *meta) +{ + return meta->flags & FLAG_INSN_IS_SUBPROG_START; } static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) @@ -3354,6 +3741,67 @@ emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } +static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) +{ + unsigned int idx; + + for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) + if (nfp_prog->subprog[idx].needs_reg_push) + return true; + + return false; +} + +static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Save all callee saved registers (R6 ~ R9). + * imm_b() holds the return address. + */ + nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame is used to push the return + * address in bpf_to_bpf_call(), start just after. + */ + wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, imm_b(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); + } +} + +static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Restore all callee saved registers (R6 ~ R9). + * ret_reg() holds the return address. + */ + nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame holds the return address, + * start popping just after that. + */ + wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); + } +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->type) { @@ -3366,12 +3814,22 @@ default: WARN_ON(1); } + + if (!nfp_prog_needs_callee_reg_save(nfp_prog)) + return; + + nfp_push_callee_registers(nfp_prog); + nfp_pop_callee_registers(nfp_prog); } static int nfp_translate(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; + unsigned int depth; int err; + + depth = nfp_prog->subprog[0].stack_depth; + nfp_prog->stack_frame_depth = round_up(depth, 4); nfp_intro(nfp_prog); if (nfp_prog->error) @@ -3382,7 +3840,13 @@ meta->off = nfp_prog_current_offset(nfp_prog); - if (meta->skip) { + if (nfp_is_subprog_start(meta)) { + nfp_start_subprog(nfp_prog, meta); + if (nfp_prog->error) + return nfp_prog->error; + } + + if (meta->flags & FLAG_INSN_SKIP_MASK) { nfp_prog->n_translated++; continue; } @@ -3430,10 +3894,10 @@ /* Programs start with R6 = R1 but we ignore the skb pointer */ if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn.src_reg == 1 && insn.dst_reg == 6) - meta->skip = true; + meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; /* Return as soon as something doesn't match */ - if (!meta->skip) + if (!(meta->flags & FLAG_INSN_SKIP_MASK)) return; } } @@ -3448,19 +3912,17 @@ list_for_each_entry(meta, &nfp_prog->insns, l) { struct bpf_insn insn = meta->insn; - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; - if (BPF_CLASS(insn.code) != BPF_ALU && - BPF_CLASS(insn.code) != BPF_ALU64 && - BPF_CLASS(insn.code) != BPF_JMP) + if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) continue; if (BPF_SRC(insn.code) != BPF_K) continue; if (insn.imm >= 0) continue; - if (BPF_CLASS(insn.code) == BPF_JMP) { + if (is_mbpf_jmp(meta)) { switch (BPF_OP(insn.code)) { case BPF_JGE: case BPF_JSGE: @@ -3490,7 +3952,7 @@ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta1, *meta2; - const s32 exp_mask[] = { + static const s32 exp_mask[] = { [BPF_B] = 0x000000ffU, [BPF_H] = 0x0000ffffU, [BPF_W] = 0xffffffffU, @@ -3522,7 +3984,7 @@ if (meta2->flags & FLAG_INSN_IS_JUMP_DST) continue; - meta2->skip = true; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } } @@ -3562,8 +4024,8 @@ meta3->flags & FLAG_INSN_IS_JUMP_DST) continue; - meta2->skip = true; - meta3->skip = true; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } } @@ -3758,7 +4220,8 @@ } head_ld_meta->paired_st = &head_st_meta->insn; - head_st_meta->skip = true; + head_st_meta->flags |= + FLAG_INSN_SKIP_PREC_DEPENDENT; } else { head_ld_meta->ldst_gather_len = 0; } @@ -3791,8 +4254,8 @@ head_ld_meta = meta1; head_st_meta = meta2; } else { - meta1->skip = true; - meta2->skip = true; + meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); @@ -3817,7 +4280,7 @@ if (meta->flags & FLAG_INSN_IS_JUMP_DST) cache_avail = false; - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; insn = &meta->insn; @@ -3903,7 +4366,7 @@ } list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { @@ -3939,7 +4402,8 @@ u32 id; nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { - if (meta1->skip || meta2->skip) + if (meta1->flags & FLAG_INSN_SKIP_MASK || + meta2->flags & FLAG_INSN_SKIP_MASK) continue; if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || @@ -4018,26 +4482,41 @@ return ret; } -void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; /* Another pass to record jump information. */ list_for_each_entry(meta, &nfp_prog->insns, l) { + struct nfp_insn_meta *dst_meta; u64 code = meta->insn.code; + unsigned int dst_idx; + bool pseudo_call; - if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && - BPF_OP(code) != BPF_CALL) { - struct nfp_insn_meta *dst_meta; - unsigned short dst_indx; + if (!is_mbpf_jmp(meta)) + continue; + if (BPF_OP(code) == BPF_EXIT) + continue; + if (is_mbpf_helper_call(meta)) + continue; - dst_indx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, - cnt); + /* If opcode is BPF_CALL at this point, this can only be a + * BPF-to-BPF call (a.k.a pseudo call). + */ + pseudo_call = BPF_OP(code) == BPF_CALL; - meta->jmp_dst = dst_meta; - dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; - } + if (pseudo_call) + dst_idx = meta->n + 1 + meta->insn.imm; + else + dst_idx = meta->n + 1 + meta->insn.off; + + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx); + + if (pseudo_call) + dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; + + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + meta->jmp_dst = dst_meta; } } @@ -4060,6 +4539,7 @@ for (i = 0; i < nfp_prog->prog_len; i++) { enum nfp_relo_type special; u32 val; + u16 off; special = FIELD_GET(OP_RELO_TYPE, prog[i]); switch (special) { @@ -4076,6 +4556,24 @@ br_set_offset(&prog[i], nfp_prog->tgt_abort + bv->start_off); break; + case RELO_BR_GO_CALL_PUSH_REGS: + if (!nfp_prog->tgt_call_push_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_push_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; + case RELO_BR_GO_CALL_POP_REGS: + if (!nfp_prog->tgt_call_pop_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_pop_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; case RELO_BR_NEXT_PKT: br_set_offset(&prog[i], bv->tgt_done); break; -- Gitblit v1.6.2