hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/net/ethernet/netronome/nfp/bpf/jit.c
....@@ -1,35 +1,5 @@
1
-/*
2
- * Copyright (C) 2016-2018 Netronome Systems, Inc.
3
- *
4
- * This software is dual licensed under the GNU General License Version 2,
5
- * June 1991 as shown in the file COPYING in the top-level directory of this
6
- * source tree or the BSD 2-Clause License provided below. You have the
7
- * option to license this software under the complete terms of either license.
8
- *
9
- * The BSD 2-Clause License:
10
- *
11
- * Redistribution and use in source and binary forms, with or
12
- * without modification, are permitted provided that the following
13
- * conditions are met:
14
- *
15
- * 1. Redistributions of source code must retain the above
16
- * copyright notice, this list of conditions and the following
17
- * disclaimer.
18
- *
19
- * 2. Redistributions in binary form must reproduce the above
20
- * copyright notice, this list of conditions and the following
21
- * disclaimer in the documentation and/or other materials
22
- * provided with the distribution.
23
- *
24
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
- * SOFTWARE.
32
- */
1
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
333
344 #define pr_fmt(fmt) "NFP net bpf: " fmt
355
....@@ -264,6 +234,38 @@
264234 emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
265235 {
266236 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
237
+}
238
+
239
+static void
240
+__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
241
+ u8 defer, bool dst_lmextn, bool src_lmextn)
242
+{
243
+ u64 insn;
244
+
245
+ insn = OP_BR_ALU_BASE |
246
+ FIELD_PREP(OP_BR_ALU_A_SRC, areg) |
247
+ FIELD_PREP(OP_BR_ALU_B_SRC, breg) |
248
+ FIELD_PREP(OP_BR_ALU_DEFBR, defer) |
249
+ FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) |
250
+ FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) |
251
+ FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn);
252
+
253
+ nfp_prog_push(nfp_prog, insn);
254
+}
255
+
256
+static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer)
257
+{
258
+ struct nfp_insn_ur_regs reg;
259
+ int err;
260
+
261
+ err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), &reg);
262
+ if (err) {
263
+ nfp_prog->error = err;
264
+ return;
265
+ }
266
+
267
+ __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn,
268
+ reg.src_lmextn);
267269 }
268270
269271 static void
....@@ -621,6 +623,13 @@
621623 }
622624
623625 static void
626
+wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
627
+{
628
+ if (meta->flags & FLAG_INSN_DO_ZEXT)
629
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
630
+}
631
+
632
+static void
624633 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
625634 enum nfp_relo_type relo)
626635 {
....@@ -856,7 +865,8 @@
856865 }
857866
858867 static int
859
-data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
868
+data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
869
+ u8 dst_gpr, int size)
860870 {
861871 unsigned int i;
862872 u16 shift, sz;
....@@ -879,14 +889,15 @@
879889 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
880890
881891 if (i < 2)
882
- wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
892
+ wrp_zext(nfp_prog, meta, dst_gpr);
883893
884894 return 0;
885895 }
886896
887897 static int
888
-data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
889
- swreg lreg, swreg rreg, int size, enum cmd_mode mode)
898
+data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
899
+ u8 dst_gpr, swreg lreg, swreg rreg, int size,
900
+ enum cmd_mode mode)
890901 {
891902 unsigned int i;
892903 u8 mask, sz;
....@@ -909,33 +920,34 @@
909920 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
910921
911922 if (i < 2)
912
- wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
923
+ wrp_zext(nfp_prog, meta, dst_gpr);
913924
914925 return 0;
915926 }
916927
917928 static int
918
-data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
919
- u8 dst_gpr, u8 size)
929
+data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
930
+ u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
920931 {
921
- return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
922
- size, CMD_MODE_32b);
932
+ return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
933
+ offset, size, CMD_MODE_32b);
923934 }
924935
925936 static int
926
-data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
927
- u8 dst_gpr, u8 size)
937
+data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
938
+ u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
928939 {
929940 swreg rega, regb;
930941
931942 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
932943
933
- return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
944
+ return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
934945 size, CMD_MODE_40b_BA);
935946 }
936947
937948 static int
938
-construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
949
+construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
950
+ u16 offset, u16 src, u8 size)
939951 {
940952 swreg tmp_reg;
941953
....@@ -951,10 +963,12 @@
951963 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
952964
953965 /* Load data */
954
- return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
966
+ return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
955967 }
956968
957
-static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
969
+static int
970
+construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
971
+ u16 offset, u8 size)
958972 {
959973 swreg tmp_reg;
960974
....@@ -965,7 +979,7 @@
965979
966980 /* Load data */
967981 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
968
- return data_ld(nfp_prog, tmp_reg, 0, size);
982
+ return data_ld(nfp_prog, meta, tmp_reg, 0, size);
969983 }
970984
971985 static int
....@@ -1148,8 +1162,8 @@
11481162 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
11491163 bool clr_gpr, lmem_step step)
11501164 {
1151
- s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1152
- bool first = true, last;
1165
+ s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off;
1166
+ bool first = true, narrow_ld, last;
11531167 bool needs_inc = false;
11541168 swreg stack_off_reg;
11551169 u8 prev_gpr = 255;
....@@ -1157,7 +1171,8 @@
11571171 bool lm3 = true;
11581172 int ret;
11591173
1160
- if (meta->ptr_not_const) {
1174
+ if (meta->ptr_not_const ||
1175
+ meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) {
11611176 /* Use of the last encountered ptr_off is OK, they all have
11621177 * the same alignment. Depend on low bits of value being
11631178 * discarded when written to LMaddr register.
....@@ -1194,14 +1209,23 @@
11941209
11951210 needs_inc = true;
11961211 }
1212
+
1213
+ narrow_ld = clr_gpr && size < 8;
1214
+
11971215 if (lm3) {
1216
+ unsigned int nop_cnt;
1217
+
11981218 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1199
- /* For size < 4 one slot will be filled by zeroing of upper. */
1200
- wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1219
+ /* For size < 4 one slot will be filled by zeroing of upper,
1220
+ * but be careful, that zeroing could be eliminated by zext
1221
+ * optimization.
1222
+ */
1223
+ nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3;
1224
+ wrp_nops(nfp_prog, nop_cnt);
12011225 }
12021226
1203
- if (clr_gpr && size < 8)
1204
- wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1227
+ if (narrow_ld)
1228
+ wrp_zext(nfp_prog, meta, gpr);
12051229
12061230 while (size) {
12071231 u32 slice_end;
....@@ -1274,7 +1298,7 @@
12741298 u64 imm = insn->imm; /* sign extend */
12751299
12761300 if (skip) {
1277
- meta->skip = true;
1301
+ meta->flags |= FLAG_INSN_SKIP_NOOP;
12781302 return 0;
12791303 }
12801304
....@@ -1302,9 +1326,10 @@
13021326 enum alu_op alu_op)
13031327 {
13041328 const struct bpf_insn *insn = &meta->insn;
1329
+ u8 dst = insn->dst_reg * 2;
13051330
1306
- wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1307
- wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1331
+ wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
1332
+ wrp_zext(nfp_prog, meta, dst);
13081333
13091334 return 0;
13101335 }
....@@ -1316,7 +1341,7 @@
13161341 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
13171342
13181343 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1319
- wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1344
+ wrp_zext(nfp_prog, meta, dst);
13201345
13211346 return 0;
13221347 }
....@@ -1337,8 +1362,9 @@
13371362
13381363 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
13391364 insn->src_reg * 2, br_mask, insn->off);
1340
- wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1341
- insn->src_reg * 2 + 1, br_mask, insn->off);
1365
+ if (is_mbpf_jmp64(meta))
1366
+ wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1367
+ insn->src_reg * 2 + 1, br_mask, insn->off);
13421368
13431369 return 0;
13441370 }
....@@ -1393,13 +1419,15 @@
13931419 else
13941420 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
13951421
1396
- tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1397
- if (!code->swap)
1398
- emit_alu(nfp_prog, reg_none(),
1399
- reg_a(reg + 1), carry_op, tmp_reg);
1400
- else
1401
- emit_alu(nfp_prog, reg_none(),
1402
- tmp_reg, carry_op, reg_a(reg + 1));
1422
+ if (is_mbpf_jmp64(meta)) {
1423
+ tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1424
+ if (!code->swap)
1425
+ emit_alu(nfp_prog, reg_none(),
1426
+ reg_a(reg + 1), carry_op, tmp_reg);
1427
+ else
1428
+ emit_alu(nfp_prog, reg_none(),
1429
+ tmp_reg, carry_op, reg_a(reg + 1));
1430
+ }
14031431
14041432 emit_br(nfp_prog, code->br_mask, insn->off, 0);
14051433
....@@ -1426,8 +1454,9 @@
14261454 }
14271455
14281456 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1429
- emit_alu(nfp_prog, reg_none(),
1430
- reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1457
+ if (is_mbpf_jmp64(meta))
1458
+ emit_alu(nfp_prog, reg_none(),
1459
+ reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
14311460 emit_br(nfp_prog, code->br_mask, insn->off, 0);
14321461
14331462 return 0;
....@@ -1701,7 +1730,7 @@
17011730 s64 lm_off;
17021731
17031732 /* We only have to reload LM0 if the key is not at start of stack */
1704
- lm_off = nfp_prog->stack_depth;
1733
+ lm_off = nfp_prog->stack_frame_depth;
17051734 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
17061735 load_lm_ptr = meta->arg2.var_off || lm_off;
17071736
....@@ -1814,10 +1843,10 @@
18141843 swreg stack_depth_reg;
18151844
18161845 stack_depth_reg = ur_load_imm_any(nfp_prog,
1817
- nfp_prog->stack_depth,
1846
+ nfp_prog->stack_frame_depth,
18181847 stack_imm(nfp_prog));
1819
- emit_alu(nfp_prog, reg_both(dst),
1820
- stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1848
+ emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog),
1849
+ ALU_OP_ADD, stack_depth_reg);
18211850 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
18221851 } else {
18231852 wrp_reg_mov(nfp_prog, dst, src);
....@@ -1966,6 +1995,9 @@
19661995 */
19671996 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
19681997 {
1998
+ if (!shift_amt)
1999
+ return 0;
2000
+
19692001 if (shift_amt < 32) {
19702002 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
19712003 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
....@@ -2078,6 +2110,9 @@
20782110 */
20792111 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
20802112 {
2113
+ if (!shift_amt)
2114
+ return 0;
2115
+
20812116 if (shift_amt < 32) {
20822117 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
20832118 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
....@@ -2179,6 +2214,9 @@
21792214 */
21802215 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
21812216 {
2217
+ if (!shift_amt)
2218
+ return 0;
2219
+
21822220 if (shift_amt < 32) {
21832221 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
21842222 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
....@@ -2380,23 +2418,132 @@
23802418 u8 dst = meta->insn.dst_reg * 2;
23812419
23822420 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2383
- wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2421
+ wrp_zext(nfp_prog, meta, dst);
23842422
2423
+ return 0;
2424
+}
2425
+
2426
+static int
2427
+__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2428
+ u8 shift_amt)
2429
+{
2430
+ if (shift_amt) {
2431
+ /* Set signedness bit (MSB of result). */
2432
+ emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR,
2433
+ reg_imm(0));
2434
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2435
+ reg_b(dst), SHF_SC_R_SHF, shift_amt);
2436
+ }
2437
+ wrp_zext(nfp_prog, meta, dst);
2438
+
2439
+ return 0;
2440
+}
2441
+
2442
+static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2443
+{
2444
+ const struct bpf_insn *insn = &meta->insn;
2445
+ u64 umin, umax;
2446
+ u8 dst, src;
2447
+
2448
+ dst = insn->dst_reg * 2;
2449
+ umin = meta->umin_src;
2450
+ umax = meta->umax_src;
2451
+ if (umin == umax)
2452
+ return __ashr_imm(nfp_prog, meta, dst, umin);
2453
+
2454
+ src = insn->src_reg * 2;
2455
+ /* NOTE: the first insn will set both indirect shift amount (source A)
2456
+ * and signedness bit (MSB of result).
2457
+ */
2458
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
2459
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2460
+ reg_b(dst), SHF_SC_R_SHF);
2461
+ wrp_zext(nfp_prog, meta, dst);
2462
+
2463
+ return 0;
2464
+}
2465
+
2466
+static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2467
+{
2468
+ const struct bpf_insn *insn = &meta->insn;
2469
+ u8 dst = insn->dst_reg * 2;
2470
+
2471
+ return __ashr_imm(nfp_prog, meta, dst, insn->imm);
2472
+}
2473
+
2474
+static int
2475
+__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2476
+ u8 shift_amt)
2477
+{
2478
+ if (shift_amt)
2479
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2480
+ reg_b(dst), SHF_SC_R_SHF, shift_amt);
2481
+ wrp_zext(nfp_prog, meta, dst);
2482
+ return 0;
2483
+}
2484
+
2485
+static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2486
+{
2487
+ const struct bpf_insn *insn = &meta->insn;
2488
+ u8 dst = insn->dst_reg * 2;
2489
+
2490
+ return __shr_imm(nfp_prog, meta, dst, insn->imm);
2491
+}
2492
+
2493
+static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2494
+{
2495
+ const struct bpf_insn *insn = &meta->insn;
2496
+ u64 umin, umax;
2497
+ u8 dst, src;
2498
+
2499
+ dst = insn->dst_reg * 2;
2500
+ umin = meta->umin_src;
2501
+ umax = meta->umax_src;
2502
+ if (umin == umax)
2503
+ return __shr_imm(nfp_prog, meta, dst, umin);
2504
+
2505
+ src = insn->src_reg * 2;
2506
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2507
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2508
+ reg_b(dst), SHF_SC_R_SHF);
2509
+ wrp_zext(nfp_prog, meta, dst);
2510
+ return 0;
2511
+}
2512
+
2513
+static int
2514
+__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2515
+ u8 shift_amt)
2516
+{
2517
+ if (shift_amt)
2518
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2519
+ reg_b(dst), SHF_SC_L_SHF, shift_amt);
2520
+ wrp_zext(nfp_prog, meta, dst);
23852521 return 0;
23862522 }
23872523
23882524 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
23892525 {
23902526 const struct bpf_insn *insn = &meta->insn;
2527
+ u8 dst = insn->dst_reg * 2;
23912528
2392
- if (!insn->imm)
2393
- return 1; /* TODO: zero shift means indirect */
2529
+ return __shl_imm(nfp_prog, meta, dst, insn->imm);
2530
+}
23942531
2395
- emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
2396
- reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
2397
- SHF_SC_L_SHF, insn->imm);
2398
- wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2532
+static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2533
+{
2534
+ const struct bpf_insn *insn = &meta->insn;
2535
+ u64 umin, umax;
2536
+ u8 dst, src;
23992537
2538
+ dst = insn->dst_reg * 2;
2539
+ umin = meta->umin_src;
2540
+ umax = meta->umax_src;
2541
+ if (umin == umax)
2542
+ return __shl_imm(nfp_prog, meta, dst, umin);
2543
+
2544
+ src = insn->src_reg * 2;
2545
+ shl_reg64_lt32_low(nfp_prog, dst, src);
2546
+ wrp_zext(nfp_prog, meta, dst);
24002547 return 0;
24012548 }
24022549
....@@ -2458,34 +2605,34 @@
24582605
24592606 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24602607 {
2461
- return construct_data_ld(nfp_prog, meta->insn.imm, 1);
2608
+ return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
24622609 }
24632610
24642611 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24652612 {
2466
- return construct_data_ld(nfp_prog, meta->insn.imm, 2);
2613
+ return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
24672614 }
24682615
24692616 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24702617 {
2471
- return construct_data_ld(nfp_prog, meta->insn.imm, 4);
2618
+ return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
24722619 }
24732620
24742621 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24752622 {
2476
- return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2623
+ return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
24772624 meta->insn.src_reg * 2, 1);
24782625 }
24792626
24802627 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24812628 {
2482
- return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2629
+ return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
24832630 meta->insn.src_reg * 2, 2);
24842631 }
24852632
24862633 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24872634 {
2488
- return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2635
+ return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
24892636 meta->insn.src_reg * 2, 4);
24902637 }
24912638
....@@ -2505,17 +2652,17 @@
25052652
25062653 switch (meta->insn.off) {
25072654 case offsetof(struct __sk_buff, len):
2508
- if (size != FIELD_SIZEOF(struct __sk_buff, len))
2655
+ if (size != sizeof_field(struct __sk_buff, len))
25092656 return -EOPNOTSUPP;
25102657 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
25112658 break;
25122659 case offsetof(struct __sk_buff, data):
2513
- if (size != FIELD_SIZEOF(struct __sk_buff, data))
2660
+ if (size != sizeof_field(struct __sk_buff, data))
25142661 return -EOPNOTSUPP;
25152662 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
25162663 break;
25172664 case offsetof(struct __sk_buff, data_end):
2518
- if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
2665
+ if (size != sizeof_field(struct __sk_buff, data_end))
25192666 return -EOPNOTSUPP;
25202667 emit_alu(nfp_prog, dst,
25212668 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
....@@ -2536,12 +2683,12 @@
25362683
25372684 switch (meta->insn.off) {
25382685 case offsetof(struct xdp_md, data):
2539
- if (size != FIELD_SIZEOF(struct xdp_md, data))
2686
+ if (size != sizeof_field(struct xdp_md, data))
25402687 return -EOPNOTSUPP;
25412688 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
25422689 break;
25432690 case offsetof(struct xdp_md, data_end):
2544
- if (size != FIELD_SIZEOF(struct xdp_md, data_end))
2691
+ if (size != sizeof_field(struct xdp_md, data_end))
25452692 return -EOPNOTSUPP;
25462693 emit_alu(nfp_prog, dst,
25472694 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
....@@ -2563,7 +2710,7 @@
25632710
25642711 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
25652712
2566
- return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
2713
+ return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
25672714 tmp_reg, meta->insn.dst_reg * 2, size);
25682715 }
25692716
....@@ -2575,7 +2722,7 @@
25752722
25762723 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
25772724
2578
- return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
2725
+ return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
25792726 tmp_reg, meta->insn.dst_reg * 2, size);
25802727 }
25812728
....@@ -2636,7 +2783,7 @@
26362783 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
26372784
26382785 if (!len_mid) {
2639
- wrp_immed(nfp_prog, dst_hi, 0);
2786
+ wrp_zext(nfp_prog, meta, dst_gpr);
26402787 return 0;
26412788 }
26422789
....@@ -2644,7 +2791,7 @@
26442791
26452792 if (size <= REG_WIDTH) {
26462793 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2647
- wrp_immed(nfp_prog, dst_hi, 0);
2794
+ wrp_zext(nfp_prog, meta, dst_gpr);
26482795 } else {
26492796 swreg src_hi = reg_xfer(idx + 2);
26502797
....@@ -2675,10 +2822,10 @@
26752822
26762823 if (size < REG_WIDTH) {
26772824 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2678
- wrp_immed(nfp_prog, dst_hi, 0);
2825
+ wrp_zext(nfp_prog, meta, dst_gpr);
26792826 } else if (size == REG_WIDTH) {
26802827 wrp_mov(nfp_prog, dst_lo, src_lo);
2681
- wrp_immed(nfp_prog, dst_hi, 0);
2828
+ wrp_zext(nfp_prog, meta, dst_gpr);
26822829 } else {
26832830 swreg src_hi = reg_xfer(idx + 1);
26842831
....@@ -3008,30 +3155,37 @@
30083155 return 0;
30093156 }
30103157
3158
+static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3159
+{
3160
+ const struct bpf_insn *insn = &meta->insn;
3161
+ swreg tmp_reg;
3162
+
3163
+ tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
3164
+ emit_alu(nfp_prog, reg_none(),
3165
+ reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3166
+ emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3167
+
3168
+ return 0;
3169
+}
3170
+
30113171 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
30123172 {
30133173 const struct bpf_insn *insn = &meta->insn;
30143174 u64 imm = insn->imm; /* sign extend */
3175
+ u8 dst_gpr = insn->dst_reg * 2;
30153176 swreg tmp_reg;
30163177
3017
- if (!imm) {
3018
- meta->skip = true;
3019
- return 0;
3020
- }
3021
-
3022
- if (imm & ~0U) {
3023
- tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3178
+ tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3179
+ emit_alu(nfp_prog, imm_b(nfp_prog),
3180
+ reg_a(dst_gpr), ALU_OP_AND, tmp_reg);
3181
+ /* Upper word of the mask can only be 0 or ~0 from sign extension,
3182
+ * so either ignore it or OR the whole thing in.
3183
+ */
3184
+ if (is_mbpf_jmp64(meta) && imm >> 32) {
30243185 emit_alu(nfp_prog, reg_none(),
3025
- reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
3026
- emit_br(nfp_prog, BR_BNE, insn->off, 0);
3186
+ reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
30273187 }
3028
-
3029
- if (imm >> 32) {
3030
- tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3031
- emit_alu(nfp_prog, reg_none(),
3032
- reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
3033
- emit_br(nfp_prog, BR_BNE, insn->off, 0);
3034
- }
3188
+ emit_br(nfp_prog, BR_BNE, insn->off, 0);
30353189
30363190 return 0;
30373191 }
....@@ -3040,11 +3194,16 @@
30403194 {
30413195 const struct bpf_insn *insn = &meta->insn;
30423196 u64 imm = insn->imm; /* sign extend */
3197
+ bool is_jmp32 = is_mbpf_jmp32(meta);
30433198 swreg tmp_reg;
30443199
30453200 if (!imm) {
3046
- emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3047
- ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
3201
+ if (is_jmp32)
3202
+ emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
3203
+ reg_b(insn->dst_reg * 2));
3204
+ else
3205
+ emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3206
+ ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
30483207 emit_br(nfp_prog, BR_BNE, insn->off, 0);
30493208 return 0;
30503209 }
....@@ -3053,6 +3212,9 @@
30533212 emit_alu(nfp_prog, reg_none(),
30543213 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
30553214 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3215
+
3216
+ if (is_jmp32)
3217
+ return 0;
30563218
30573219 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
30583220 emit_alu(nfp_prog, reg_none(),
....@@ -3068,10 +3230,13 @@
30683230
30693231 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
30703232 ALU_OP_XOR, reg_b(insn->src_reg * 2));
3071
- emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
3072
- ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
3073
- emit_alu(nfp_prog, reg_none(),
3074
- imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
3233
+ if (is_mbpf_jmp64(meta)) {
3234
+ emit_alu(nfp_prog, imm_b(nfp_prog),
3235
+ reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
3236
+ reg_b(insn->src_reg * 2 + 1));
3237
+ emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
3238
+ imm_b(nfp_prog));
3239
+ }
30753240 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
30763241
30773242 return 0;
....@@ -3087,7 +3252,93 @@
30873252 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
30883253 }
30893254
3090
-static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3255
+static int
3256
+bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3257
+{
3258
+ u32 ret_tgt, stack_depth, offset_br;
3259
+ swreg tmp_reg;
3260
+
3261
+ stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN);
3262
+ /* Space for saving the return address is accounted for by the callee,
3263
+ * so stack_depth can be zero for the main function.
3264
+ */
3265
+ if (stack_depth) {
3266
+ tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3267
+ stack_imm(nfp_prog));
3268
+ emit_alu(nfp_prog, stack_reg(nfp_prog),
3269
+ stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg);
3270
+ emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3271
+ NFP_CSR_ACT_LM_ADDR0);
3272
+ }
3273
+
3274
+ /* Two cases for jumping to the callee:
3275
+ *
3276
+ * - If callee uses and needs to save R6~R9 then:
3277
+ * 1. Put the start offset of the callee into imm_b(). This will
3278
+ * require a fixup step, as we do not necessarily know this
3279
+ * address yet.
3280
+ * 2. Put the return address from the callee to the caller into
3281
+ * register ret_reg().
3282
+ * 3. (After defer slots are consumed) Jump to the subroutine that
3283
+ * pushes the registers to the stack.
3284
+ * The subroutine acts as a trampoline, and returns to the address in
3285
+ * imm_b(), i.e. jumps to the callee.
3286
+ *
3287
+ * - If callee does not need to save R6~R9 then just load return
3288
+ * address to the caller in ret_reg(), and jump to the callee
3289
+ * directly.
3290
+ *
3291
+ * Using ret_reg() to pass the return address to the callee is set here
3292
+ * as a convention. The callee can then push this address onto its
3293
+ * stack frame in its prologue. The advantages of passing the return
3294
+ * address through ret_reg(), instead of pushing it to the stack right
3295
+ * here, are the following:
3296
+ * - It looks cleaner.
3297
+ * - If the called function is called multiple time, we get a lower
3298
+ * program size.
3299
+ * - We save two no-op instructions that should be added just before
3300
+ * the emit_br() when stack depth is not null otherwise.
3301
+ * - If we ever find a register to hold the return address during whole
3302
+ * execution of the callee, we will not have to push the return
3303
+ * address to the stack for leaf functions.
3304
+ */
3305
+ if (!meta->jmp_dst) {
3306
+ pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
3307
+ return -ELOOP;
3308
+ }
3309
+ if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
3310
+ ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
3311
+ emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
3312
+ RELO_BR_GO_CALL_PUSH_REGS);
3313
+ offset_br = nfp_prog_current_offset(nfp_prog);
3314
+ wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
3315
+ } else {
3316
+ ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
3317
+ emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1);
3318
+ offset_br = nfp_prog_current_offset(nfp_prog);
3319
+ }
3320
+ wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
3321
+
3322
+ if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
3323
+ return -EINVAL;
3324
+
3325
+ if (stack_depth) {
3326
+ tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3327
+ stack_imm(nfp_prog));
3328
+ emit_alu(nfp_prog, stack_reg(nfp_prog),
3329
+ stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
3330
+ emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3331
+ NFP_CSR_ACT_LM_ADDR0);
3332
+ wrp_nops(nfp_prog, 3);
3333
+ }
3334
+
3335
+ meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog);
3336
+ meta->num_insns_after_br -= offset_br;
3337
+
3338
+ return 0;
3339
+}
3340
+
3341
+static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
30913342 {
30923343 switch (meta->insn.imm) {
30933344 case BPF_FUNC_xdp_adjust_head:
....@@ -3108,11 +3359,57 @@
31083359 }
31093360 }
31103361
3362
+static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3363
+{
3364
+ if (is_mbpf_pseudo_call(meta))
3365
+ return bpf_to_bpf_call(nfp_prog, meta);
3366
+ else
3367
+ return helper_call(nfp_prog, meta);
3368
+}
3369
+
3370
+static bool nfp_is_main_function(struct nfp_insn_meta *meta)
3371
+{
3372
+ return meta->subprog_idx == 0;
3373
+}
3374
+
31113375 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
31123376 {
31133377 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
31143378
31153379 return 0;
3380
+}
3381
+
3382
+static int
3383
+nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3384
+{
3385
+ if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
3386
+ /* Pop R6~R9 to the stack via related subroutine.
3387
+ * We loaded the return address to the caller into ret_reg().
3388
+ * This means that the subroutine does not come back here, we
3389
+ * make it jump back to the subprogram caller directly!
3390
+ */
3391
+ emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
3392
+ RELO_BR_GO_CALL_POP_REGS);
3393
+ /* Pop return address from the stack. */
3394
+ wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3395
+ } else {
3396
+ /* Pop return address from the stack. */
3397
+ wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3398
+ /* Jump back to caller if no callee-saved registers were used
3399
+ * by the subprogram.
3400
+ */
3401
+ emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
3402
+ }
3403
+
3404
+ return 0;
3405
+}
3406
+
3407
+static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3408
+{
3409
+ if (nfp_is_main_function(meta))
3410
+ return goto_out(nfp_prog, meta);
3411
+ else
3412
+ return nfp_subprog_epilogue(nfp_prog, meta);
31163413 }
31173414
31183415 static const instr_cb_t instr_cb[256] = {
....@@ -3156,7 +3453,12 @@
31563453 [BPF_ALU | BPF_DIV | BPF_X] = div_reg,
31573454 [BPF_ALU | BPF_DIV | BPF_K] = div_imm,
31583455 [BPF_ALU | BPF_NEG] = neg_reg,
3456
+ [BPF_ALU | BPF_LSH | BPF_X] = shl_reg,
31593457 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
3458
+ [BPF_ALU | BPF_RSH | BPF_X] = shr_reg,
3459
+ [BPF_ALU | BPF_RSH | BPF_K] = shr_imm,
3460
+ [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg,
3461
+ [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm,
31603462 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
31613463 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
31623464 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
....@@ -3202,22 +3504,62 @@
32023504 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
32033505 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
32043506 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
3507
+ [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm,
3508
+ [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm,
3509
+ [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm,
3510
+ [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm,
3511
+ [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm,
3512
+ [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
3513
+ [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
3514
+ [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
3515
+ [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
3516
+ [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
3517
+ [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm,
3518
+ [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg,
3519
+ [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg,
3520
+ [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg,
3521
+ [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg,
3522
+ [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg,
3523
+ [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
3524
+ [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
3525
+ [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
3526
+ [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
3527
+ [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
3528
+ [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg,
32053529 [BPF_JMP | BPF_CALL] = call,
3206
- [BPF_JMP | BPF_EXIT] = goto_out,
3530
+ [BPF_JMP | BPF_EXIT] = jmp_exit,
32073531 };
32083532
32093533 /* --- Assembler logic --- */
3534
+static int
3535
+nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
3536
+ struct nfp_insn_meta *jmp_dst, u32 br_idx)
3537
+{
3538
+ if (immed_get_value(nfp_prog->prog[br_idx + 1])) {
3539
+ pr_err("BUG: failed to fix up callee register saving\n");
3540
+ return -EINVAL;
3541
+ }
3542
+
3543
+ immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off);
3544
+
3545
+ return 0;
3546
+}
3547
+
32103548 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
32113549 {
32123550 struct nfp_insn_meta *meta, *jmp_dst;
32133551 u32 idx, br_idx;
3552
+ int err;
32143553
32153554 list_for_each_entry(meta, &nfp_prog->insns, l) {
3216
- if (meta->skip)
3555
+ if (meta->flags & FLAG_INSN_SKIP_MASK)
32173556 continue;
3218
- if (meta->insn.code == (BPF_JMP | BPF_CALL))
3557
+ if (!is_mbpf_jmp(meta))
32193558 continue;
3220
- if (BPF_CLASS(meta->insn.code) != BPF_JMP)
3559
+ if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
3560
+ !nfp_is_main_function(meta))
3561
+ continue;
3562
+ if (is_mbpf_helper_call(meta))
32213563 continue;
32223564
32233565 if (list_is_last(&meta->l, &nfp_prog->insns))
....@@ -3225,14 +3567,26 @@
32253567 else
32263568 br_idx = list_next_entry(meta, l)->off - 1;
32273569
3570
+ /* For BPF-to-BPF function call, a stack adjustment sequence is
3571
+ * generated after the return instruction. Therefore, we must
3572
+ * withdraw the length of this sequence to have br_idx pointing
3573
+ * to where the "branch" NFP instruction is expected to be.
3574
+ */
3575
+ if (is_mbpf_pseudo_call(meta))
3576
+ br_idx -= meta->num_insns_after_br;
3577
+
32283578 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
32293579 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
32303580 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
32313581 return -ELOOP;
32323582 }
3583
+
3584
+ if (meta->insn.code == (BPF_JMP | BPF_EXIT))
3585
+ continue;
3586
+
32333587 /* Leave special branches for later */
32343588 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3235
- RELO_BR_REL)
3589
+ RELO_BR_REL && !is_mbpf_pseudo_call(meta))
32363590 continue;
32373591
32383592 if (!meta->jmp_dst) {
....@@ -3242,10 +3596,22 @@
32423596
32433597 jmp_dst = meta->jmp_dst;
32443598
3245
- if (jmp_dst->skip) {
3599
+ if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) {
32463600 pr_err("Branch landing on removed instruction!!\n");
32473601 return -ELOOP;
32483602 }
3603
+
3604
+ if (is_mbpf_pseudo_call(meta) &&
3605
+ nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
3606
+ err = nfp_fixup_immed_relo(nfp_prog, meta,
3607
+ jmp_dst, br_idx);
3608
+ if (err)
3609
+ return err;
3610
+ }
3611
+
3612
+ if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3613
+ RELO_BR_REL)
3614
+ continue;
32493615
32503616 for (idx = meta->off; idx <= br_idx; idx++) {
32513617 if (!nfp_is_br(nfp_prog->prog[idx]))
....@@ -3262,6 +3628,27 @@
32623628 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
32633629 emit_alu(nfp_prog, plen_reg(nfp_prog),
32643630 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
3631
+}
3632
+
3633
+static void
3634
+nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3635
+{
3636
+ /* Save return address into the stack. */
3637
+ wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog));
3638
+}
3639
+
3640
+static void
3641
+nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3642
+{
3643
+ unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth;
3644
+
3645
+ nfp_prog->stack_frame_depth = round_up(depth, 4);
3646
+ nfp_subprog_prologue(nfp_prog, meta);
3647
+}
3648
+
3649
+bool nfp_is_subprog_start(struct nfp_insn_meta *meta)
3650
+{
3651
+ return meta->flags & FLAG_INSN_IS_SUBPROG_START;
32653652 }
32663653
32673654 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
....@@ -3354,6 +3741,67 @@
33543741 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
33553742 }
33563743
3744
+static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
3745
+{
3746
+ unsigned int idx;
3747
+
3748
+ for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
3749
+ if (nfp_prog->subprog[idx].needs_reg_push)
3750
+ return true;
3751
+
3752
+ return false;
3753
+}
3754
+
3755
+static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
3756
+{
3757
+ u8 reg;
3758
+
3759
+ /* Subroutine: Save all callee saved registers (R6 ~ R9).
3760
+ * imm_b() holds the return address.
3761
+ */
3762
+ nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog);
3763
+ for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3764
+ u8 adj = (reg - BPF_REG_0) * 2;
3765
+ u8 idx = (reg - BPF_REG_6) * 2;
3766
+
3767
+ /* The first slot in the stack frame is used to push the return
3768
+ * address in bpf_to_bpf_call(), start just after.
3769
+ */
3770
+ wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj));
3771
+
3772
+ if (reg == BPF_REG_8)
3773
+ /* Prepare to jump back, last 3 insns use defer slots */
3774
+ emit_rtn(nfp_prog, imm_b(nfp_prog), 3);
3775
+
3776
+ wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1));
3777
+ }
3778
+}
3779
+
3780
+static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog)
3781
+{
3782
+ u8 reg;
3783
+
3784
+ /* Subroutine: Restore all callee saved registers (R6 ~ R9).
3785
+ * ret_reg() holds the return address.
3786
+ */
3787
+ nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog);
3788
+ for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3789
+ u8 adj = (reg - BPF_REG_0) * 2;
3790
+ u8 idx = (reg - BPF_REG_6) * 2;
3791
+
3792
+ /* The first slot in the stack frame holds the return address,
3793
+ * start popping just after that.
3794
+ */
3795
+ wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx));
3796
+
3797
+ if (reg == BPF_REG_8)
3798
+ /* Prepare to jump back, last 3 insns use defer slots */
3799
+ emit_rtn(nfp_prog, ret_reg(nfp_prog), 3);
3800
+
3801
+ wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1));
3802
+ }
3803
+}
3804
+
33573805 static void nfp_outro(struct nfp_prog *nfp_prog)
33583806 {
33593807 switch (nfp_prog->type) {
....@@ -3366,12 +3814,22 @@
33663814 default:
33673815 WARN_ON(1);
33683816 }
3817
+
3818
+ if (!nfp_prog_needs_callee_reg_save(nfp_prog))
3819
+ return;
3820
+
3821
+ nfp_push_callee_registers(nfp_prog);
3822
+ nfp_pop_callee_registers(nfp_prog);
33693823 }
33703824
33713825 static int nfp_translate(struct nfp_prog *nfp_prog)
33723826 {
33733827 struct nfp_insn_meta *meta;
3828
+ unsigned int depth;
33743829 int err;
3830
+
3831
+ depth = nfp_prog->subprog[0].stack_depth;
3832
+ nfp_prog->stack_frame_depth = round_up(depth, 4);
33753833
33763834 nfp_intro(nfp_prog);
33773835 if (nfp_prog->error)
....@@ -3382,7 +3840,13 @@
33823840
33833841 meta->off = nfp_prog_current_offset(nfp_prog);
33843842
3385
- if (meta->skip) {
3843
+ if (nfp_is_subprog_start(meta)) {
3844
+ nfp_start_subprog(nfp_prog, meta);
3845
+ if (nfp_prog->error)
3846
+ return nfp_prog->error;
3847
+ }
3848
+
3849
+ if (meta->flags & FLAG_INSN_SKIP_MASK) {
33863850 nfp_prog->n_translated++;
33873851 continue;
33883852 }
....@@ -3430,10 +3894,10 @@
34303894 /* Programs start with R6 = R1 but we ignore the skb pointer */
34313895 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
34323896 insn.src_reg == 1 && insn.dst_reg == 6)
3433
- meta->skip = true;
3897
+ meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
34343898
34353899 /* Return as soon as something doesn't match */
3436
- if (!meta->skip)
3900
+ if (!(meta->flags & FLAG_INSN_SKIP_MASK))
34373901 return;
34383902 }
34393903 }
....@@ -3448,19 +3912,17 @@
34483912 list_for_each_entry(meta, &nfp_prog->insns, l) {
34493913 struct bpf_insn insn = meta->insn;
34503914
3451
- if (meta->skip)
3915
+ if (meta->flags & FLAG_INSN_SKIP_MASK)
34523916 continue;
34533917
3454
- if (BPF_CLASS(insn.code) != BPF_ALU &&
3455
- BPF_CLASS(insn.code) != BPF_ALU64 &&
3456
- BPF_CLASS(insn.code) != BPF_JMP)
3918
+ if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
34573919 continue;
34583920 if (BPF_SRC(insn.code) != BPF_K)
34593921 continue;
34603922 if (insn.imm >= 0)
34613923 continue;
34623924
3463
- if (BPF_CLASS(insn.code) == BPF_JMP) {
3925
+ if (is_mbpf_jmp(meta)) {
34643926 switch (BPF_OP(insn.code)) {
34653927 case BPF_JGE:
34663928 case BPF_JSGE:
....@@ -3490,7 +3952,7 @@
34903952 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
34913953 {
34923954 struct nfp_insn_meta *meta1, *meta2;
3493
- const s32 exp_mask[] = {
3955
+ static const s32 exp_mask[] = {
34943956 [BPF_B] = 0x000000ffU,
34953957 [BPF_H] = 0x0000ffffU,
34963958 [BPF_W] = 0xffffffffU,
....@@ -3522,7 +3984,7 @@
35223984 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
35233985 continue;
35243986
3525
- meta2->skip = true;
3987
+ meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
35263988 }
35273989 }
35283990
....@@ -3562,8 +4024,8 @@
35624024 meta3->flags & FLAG_INSN_IS_JUMP_DST)
35634025 continue;
35644026
3565
- meta2->skip = true;
3566
- meta3->skip = true;
4027
+ meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4028
+ meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
35674029 }
35684030 }
35694031
....@@ -3758,7 +4220,8 @@
37584220 }
37594221
37604222 head_ld_meta->paired_st = &head_st_meta->insn;
3761
- head_st_meta->skip = true;
4223
+ head_st_meta->flags |=
4224
+ FLAG_INSN_SKIP_PREC_DEPENDENT;
37624225 } else {
37634226 head_ld_meta->ldst_gather_len = 0;
37644227 }
....@@ -3791,8 +4254,8 @@
37914254 head_ld_meta = meta1;
37924255 head_st_meta = meta2;
37934256 } else {
3794
- meta1->skip = true;
3795
- meta2->skip = true;
4257
+ meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4258
+ meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
37964259 }
37974260
37984261 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
....@@ -3817,7 +4280,7 @@
38174280 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
38184281 cache_avail = false;
38194282
3820
- if (meta->skip)
4283
+ if (meta->flags & FLAG_INSN_SKIP_MASK)
38214284 continue;
38224285
38234286 insn = &meta->insn;
....@@ -3903,7 +4366,7 @@
39034366 }
39044367
39054368 list_for_each_entry(meta, &nfp_prog->insns, l) {
3906
- if (meta->skip)
4369
+ if (meta->flags & FLAG_INSN_SKIP_MASK)
39074370 continue;
39084371
39094372 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
....@@ -3939,7 +4402,8 @@
39394402 u32 id;
39404403
39414404 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3942
- if (meta1->skip || meta2->skip)
4405
+ if (meta1->flags & FLAG_INSN_SKIP_MASK ||
4406
+ meta2->flags & FLAG_INSN_SKIP_MASK)
39434407 continue;
39444408
39454409 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
....@@ -4018,26 +4482,41 @@
40184482 return ret;
40194483 }
40204484
4021
-void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
4485
+void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog)
40224486 {
40234487 struct nfp_insn_meta *meta;
40244488
40254489 /* Another pass to record jump information. */
40264490 list_for_each_entry(meta, &nfp_prog->insns, l) {
4491
+ struct nfp_insn_meta *dst_meta;
40274492 u64 code = meta->insn.code;
4493
+ unsigned int dst_idx;
4494
+ bool pseudo_call;
40284495
4029
- if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
4030
- BPF_OP(code) != BPF_CALL) {
4031
- struct nfp_insn_meta *dst_meta;
4032
- unsigned short dst_indx;
4496
+ if (!is_mbpf_jmp(meta))
4497
+ continue;
4498
+ if (BPF_OP(code) == BPF_EXIT)
4499
+ continue;
4500
+ if (is_mbpf_helper_call(meta))
4501
+ continue;
40334502
4034
- dst_indx = meta->n + 1 + meta->insn.off;
4035
- dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
4036
- cnt);
4503
+ /* If opcode is BPF_CALL at this point, this can only be a
4504
+ * BPF-to-BPF call (a.k.a pseudo call).
4505
+ */
4506
+ pseudo_call = BPF_OP(code) == BPF_CALL;
40374507
4038
- meta->jmp_dst = dst_meta;
4039
- dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4040
- }
4508
+ if (pseudo_call)
4509
+ dst_idx = meta->n + 1 + meta->insn.imm;
4510
+ else
4511
+ dst_idx = meta->n + 1 + meta->insn.off;
4512
+
4513
+ dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx);
4514
+
4515
+ if (pseudo_call)
4516
+ dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START;
4517
+
4518
+ dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4519
+ meta->jmp_dst = dst_meta;
40414520 }
40424521 }
40434522
....@@ -4060,6 +4539,7 @@
40604539 for (i = 0; i < nfp_prog->prog_len; i++) {
40614540 enum nfp_relo_type special;
40624541 u32 val;
4542
+ u16 off;
40634543
40644544 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
40654545 switch (special) {
....@@ -4076,6 +4556,24 @@
40764556 br_set_offset(&prog[i],
40774557 nfp_prog->tgt_abort + bv->start_off);
40784558 break;
4559
+ case RELO_BR_GO_CALL_PUSH_REGS:
4560
+ if (!nfp_prog->tgt_call_push_regs) {
4561
+ pr_err("BUG: failed to detect subprogram registers needs\n");
4562
+ err = -EINVAL;
4563
+ goto err_free_prog;
4564
+ }
4565
+ off = nfp_prog->tgt_call_push_regs + bv->start_off;
4566
+ br_set_offset(&prog[i], off);
4567
+ break;
4568
+ case RELO_BR_GO_CALL_POP_REGS:
4569
+ if (!nfp_prog->tgt_call_pop_regs) {
4570
+ pr_err("BUG: failed to detect subprogram registers needs\n");
4571
+ err = -EINVAL;
4572
+ goto err_free_prog;
4573
+ }
4574
+ off = nfp_prog->tgt_call_pop_regs + bv->start_off;
4575
+ br_set_offset(&prog[i], off);
4576
+ break;
40794577 case RELO_BR_NEXT_PKT:
40804578 br_set_offset(&prog[i], bv->tgt_done);
40814579 break;