| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * x86_64/AVX2/AES-NI assembler implementation of Camellia |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 8 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 9 | | - * (at your option) any later version. |
|---|
| 10 | | - * |
|---|
| 11 | 6 | */ |
|---|
| 12 | 7 | |
|---|
| 13 | 8 | #include <linux/linkage.h> |
|---|
| .. | .. |
|---|
| 228 | 223 | * larger and would only marginally faster. |
|---|
| 229 | 224 | */ |
|---|
| 230 | 225 | .align 8 |
|---|
| 231 | | -roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: |
|---|
| 226 | +SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
|---|
| 232 | 227 | roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, |
|---|
| 233 | 228 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, |
|---|
| 234 | 229 | %rcx, (%r9)); |
|---|
| 235 | | - ret; |
|---|
| 236 | | -ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
|---|
| 230 | + RET; |
|---|
| 231 | +SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
|---|
| 237 | 232 | |
|---|
| 238 | 233 | .align 8 |
|---|
| 239 | | -roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: |
|---|
| 234 | +SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
|---|
| 240 | 235 | roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, |
|---|
| 241 | 236 | %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, |
|---|
| 242 | 237 | %rax, (%r9)); |
|---|
| 243 | | - ret; |
|---|
| 244 | | -ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
|---|
| 238 | + RET; |
|---|
| 239 | +SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
|---|
| 245 | 240 | |
|---|
| 246 | 241 | /* |
|---|
| 247 | 242 | * IN/OUT: |
|---|
| .. | .. |
|---|
| 765 | 760 | .text |
|---|
| 766 | 761 | |
|---|
| 767 | 762 | .align 8 |
|---|
| 768 | | -__camellia_enc_blk32: |
|---|
| 763 | +SYM_FUNC_START_LOCAL(__camellia_enc_blk32) |
|---|
| 769 | 764 | /* input: |
|---|
| 770 | 765 | * %rdi: ctx, CTX |
|---|
| 771 | 766 | * %rax: temporary storage, 512 bytes |
|---|
| .. | .. |
|---|
| 830 | 825 | %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); |
|---|
| 831 | 826 | |
|---|
| 832 | 827 | FRAME_END |
|---|
| 833 | | - ret; |
|---|
| 828 | + RET; |
|---|
| 834 | 829 | |
|---|
| 835 | 830 | .align 8 |
|---|
| 836 | 831 | .Lenc_max32: |
|---|
| .. | .. |
|---|
| 849 | 844 | %ymm15, %rax, %rcx, 24); |
|---|
| 850 | 845 | |
|---|
| 851 | 846 | jmp .Lenc_done; |
|---|
| 852 | | -ENDPROC(__camellia_enc_blk32) |
|---|
| 847 | +SYM_FUNC_END(__camellia_enc_blk32) |
|---|
| 853 | 848 | |
|---|
| 854 | 849 | .align 8 |
|---|
| 855 | | -__camellia_dec_blk32: |
|---|
| 850 | +SYM_FUNC_START_LOCAL(__camellia_dec_blk32) |
|---|
| 856 | 851 | /* input: |
|---|
| 857 | 852 | * %rdi: ctx, CTX |
|---|
| 858 | 853 | * %rax: temporary storage, 512 bytes |
|---|
| .. | .. |
|---|
| 917 | 912 | %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); |
|---|
| 918 | 913 | |
|---|
| 919 | 914 | FRAME_END |
|---|
| 920 | | - ret; |
|---|
| 915 | + RET; |
|---|
| 921 | 916 | |
|---|
| 922 | 917 | .align 8 |
|---|
| 923 | 918 | .Ldec_max32: |
|---|
| .. | .. |
|---|
| 934 | 929 | ((key_table + (24) * 8) + 4)(CTX)); |
|---|
| 935 | 930 | |
|---|
| 936 | 931 | jmp .Ldec_max24; |
|---|
| 937 | | -ENDPROC(__camellia_dec_blk32) |
|---|
| 932 | +SYM_FUNC_END(__camellia_dec_blk32) |
|---|
| 938 | 933 | |
|---|
| 939 | | -ENTRY(camellia_ecb_enc_32way) |
|---|
| 934 | +SYM_FUNC_START(camellia_ecb_enc_32way) |
|---|
| 940 | 935 | /* input: |
|---|
| 941 | 936 | * %rdi: ctx, CTX |
|---|
| 942 | 937 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 962 | 957 | vzeroupper; |
|---|
| 963 | 958 | |
|---|
| 964 | 959 | FRAME_END |
|---|
| 965 | | - ret; |
|---|
| 966 | | -ENDPROC(camellia_ecb_enc_32way) |
|---|
| 960 | + RET; |
|---|
| 961 | +SYM_FUNC_END(camellia_ecb_enc_32way) |
|---|
| 967 | 962 | |
|---|
| 968 | | -ENTRY(camellia_ecb_dec_32way) |
|---|
| 963 | +SYM_FUNC_START(camellia_ecb_dec_32way) |
|---|
| 969 | 964 | /* input: |
|---|
| 970 | 965 | * %rdi: ctx, CTX |
|---|
| 971 | 966 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 996 | 991 | vzeroupper; |
|---|
| 997 | 992 | |
|---|
| 998 | 993 | FRAME_END |
|---|
| 999 | | - ret; |
|---|
| 1000 | | -ENDPROC(camellia_ecb_dec_32way) |
|---|
| 994 | + RET; |
|---|
| 995 | +SYM_FUNC_END(camellia_ecb_dec_32way) |
|---|
| 1001 | 996 | |
|---|
| 1002 | | -ENTRY(camellia_cbc_dec_32way) |
|---|
| 997 | +SYM_FUNC_START(camellia_cbc_dec_32way) |
|---|
| 1003 | 998 | /* input: |
|---|
| 1004 | 999 | * %rdi: ctx, CTX |
|---|
| 1005 | 1000 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 1064 | 1059 | vzeroupper; |
|---|
| 1065 | 1060 | |
|---|
| 1066 | 1061 | FRAME_END |
|---|
| 1067 | | - ret; |
|---|
| 1068 | | -ENDPROC(camellia_cbc_dec_32way) |
|---|
| 1062 | + RET; |
|---|
| 1063 | +SYM_FUNC_END(camellia_cbc_dec_32way) |
|---|
| 1069 | 1064 | |
|---|
| 1070 | 1065 | #define inc_le128(x, minus_one, tmp) \ |
|---|
| 1071 | 1066 | vpcmpeqq minus_one, x, tmp; \ |
|---|
| .. | .. |
|---|
| 1081 | 1076 | vpslldq $8, tmp1, tmp1; \ |
|---|
| 1082 | 1077 | vpsubq tmp1, x, x; |
|---|
| 1083 | 1078 | |
|---|
| 1084 | | -ENTRY(camellia_ctr_32way) |
|---|
| 1079 | +SYM_FUNC_START(camellia_ctr_32way) |
|---|
| 1085 | 1080 | /* input: |
|---|
| 1086 | 1081 | * %rdi: ctx, CTX |
|---|
| 1087 | 1082 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 1204 | 1199 | vzeroupper; |
|---|
| 1205 | 1200 | |
|---|
| 1206 | 1201 | FRAME_END |
|---|
| 1207 | | - ret; |
|---|
| 1208 | | -ENDPROC(camellia_ctr_32way) |
|---|
| 1202 | + RET; |
|---|
| 1203 | +SYM_FUNC_END(camellia_ctr_32way) |
|---|
| 1209 | 1204 | |
|---|
| 1210 | 1205 | #define gf128mul_x_ble(iv, mask, tmp) \ |
|---|
| 1211 | 1206 | vpsrad $31, iv, tmp; \ |
|---|
| .. | .. |
|---|
| 1227 | 1222 | vpxor tmp1, iv, iv; |
|---|
| 1228 | 1223 | |
|---|
| 1229 | 1224 | .align 8 |
|---|
| 1230 | | -camellia_xts_crypt_32way: |
|---|
| 1225 | +SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) |
|---|
| 1231 | 1226 | /* input: |
|---|
| 1232 | 1227 | * %rdi: ctx, CTX |
|---|
| 1233 | 1228 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 1344 | 1339 | vpxor 14 * 32(%rax), %ymm15, %ymm14; |
|---|
| 1345 | 1340 | vpxor 15 * 32(%rax), %ymm15, %ymm15; |
|---|
| 1346 | 1341 | |
|---|
| 1347 | | - CALL_NOSPEC %r9; |
|---|
| 1342 | + CALL_NOSPEC r9; |
|---|
| 1348 | 1343 | |
|---|
| 1349 | 1344 | addq $(16 * 32), %rsp; |
|---|
| 1350 | 1345 | |
|---|
| .. | .. |
|---|
| 1371 | 1366 | vzeroupper; |
|---|
| 1372 | 1367 | |
|---|
| 1373 | 1368 | FRAME_END |
|---|
| 1374 | | - ret; |
|---|
| 1375 | | -ENDPROC(camellia_xts_crypt_32way) |
|---|
| 1369 | + RET; |
|---|
| 1370 | +SYM_FUNC_END(camellia_xts_crypt_32way) |
|---|
| 1376 | 1371 | |
|---|
| 1377 | | -ENTRY(camellia_xts_enc_32way) |
|---|
| 1372 | +SYM_FUNC_START(camellia_xts_enc_32way) |
|---|
| 1378 | 1373 | /* input: |
|---|
| 1379 | 1374 | * %rdi: ctx, CTX |
|---|
| 1380 | 1375 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 1387 | 1382 | leaq __camellia_enc_blk32, %r9; |
|---|
| 1388 | 1383 | |
|---|
| 1389 | 1384 | jmp camellia_xts_crypt_32way; |
|---|
| 1390 | | -ENDPROC(camellia_xts_enc_32way) |
|---|
| 1385 | +SYM_FUNC_END(camellia_xts_enc_32way) |
|---|
| 1391 | 1386 | |
|---|
| 1392 | | -ENTRY(camellia_xts_dec_32way) |
|---|
| 1387 | +SYM_FUNC_START(camellia_xts_dec_32way) |
|---|
| 1393 | 1388 | /* input: |
|---|
| 1394 | 1389 | * %rdi: ctx, CTX |
|---|
| 1395 | 1390 | * %rsi: dst (32 blocks) |
|---|
| .. | .. |
|---|
| 1405 | 1400 | leaq __camellia_dec_blk32, %r9; |
|---|
| 1406 | 1401 | |
|---|
| 1407 | 1402 | jmp camellia_xts_crypt_32way; |
|---|
| 1408 | | -ENDPROC(camellia_xts_dec_32way) |
|---|
| 1403 | +SYM_FUNC_END(camellia_xts_dec_32way) |
|---|