.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* |
---|
2 | 3 | * x86_64/AVX2/AES-NI assembler implementation of Camellia |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or modify |
---|
7 | | - * it under the terms of the GNU General Public License as published by |
---|
8 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
9 | | - * (at your option) any later version. |
---|
10 | | - * |
---|
11 | 6 | */ |
---|
12 | 7 | |
---|
13 | 8 | #include <linux/linkage.h> |
---|
.. | .. |
---|
228 | 223 | * larger and would only marginally faster. |
---|
229 | 224 | */ |
---|
230 | 225 | .align 8 |
---|
231 | | -roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: |
---|
| 226 | +SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
232 | 227 | roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, |
---|
233 | 228 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, |
---|
234 | 229 | %rcx, (%r9)); |
---|
235 | | - ret; |
---|
236 | | -ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
| 230 | + RET; |
---|
| 231 | +SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
237 | 232 | |
---|
238 | 233 | .align 8 |
---|
239 | | -roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: |
---|
| 234 | +SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
240 | 235 | roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, |
---|
241 | 236 | %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, |
---|
242 | 237 | %rax, (%r9)); |
---|
243 | | - ret; |
---|
244 | | -ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
| 238 | + RET; |
---|
| 239 | +SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
245 | 240 | |
---|
246 | 241 | /* |
---|
247 | 242 | * IN/OUT: |
---|
.. | .. |
---|
765 | 760 | .text |
---|
766 | 761 | |
---|
767 | 762 | .align 8 |
---|
768 | | -__camellia_enc_blk32: |
---|
| 763 | +SYM_FUNC_START_LOCAL(__camellia_enc_blk32) |
---|
769 | 764 | /* input: |
---|
770 | 765 | * %rdi: ctx, CTX |
---|
771 | 766 | * %rax: temporary storage, 512 bytes |
---|
.. | .. |
---|
830 | 825 | %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); |
---|
831 | 826 | |
---|
832 | 827 | FRAME_END |
---|
833 | | - ret; |
---|
| 828 | + RET; |
---|
834 | 829 | |
---|
835 | 830 | .align 8 |
---|
836 | 831 | .Lenc_max32: |
---|
.. | .. |
---|
849 | 844 | %ymm15, %rax, %rcx, 24); |
---|
850 | 845 | |
---|
851 | 846 | jmp .Lenc_done; |
---|
852 | | -ENDPROC(__camellia_enc_blk32) |
---|
| 847 | +SYM_FUNC_END(__camellia_enc_blk32) |
---|
853 | 848 | |
---|
854 | 849 | .align 8 |
---|
855 | | -__camellia_dec_blk32: |
---|
| 850 | +SYM_FUNC_START_LOCAL(__camellia_dec_blk32) |
---|
856 | 851 | /* input: |
---|
857 | 852 | * %rdi: ctx, CTX |
---|
858 | 853 | * %rax: temporary storage, 512 bytes |
---|
.. | .. |
---|
917 | 912 | %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); |
---|
918 | 913 | |
---|
919 | 914 | FRAME_END |
---|
920 | | - ret; |
---|
| 915 | + RET; |
---|
921 | 916 | |
---|
922 | 917 | .align 8 |
---|
923 | 918 | .Ldec_max32: |
---|
.. | .. |
---|
934 | 929 | ((key_table + (24) * 8) + 4)(CTX)); |
---|
935 | 930 | |
---|
936 | 931 | jmp .Ldec_max24; |
---|
937 | | -ENDPROC(__camellia_dec_blk32) |
---|
| 932 | +SYM_FUNC_END(__camellia_dec_blk32) |
---|
938 | 933 | |
---|
939 | | -ENTRY(camellia_ecb_enc_32way) |
---|
| 934 | +SYM_FUNC_START(camellia_ecb_enc_32way) |
---|
940 | 935 | /* input: |
---|
941 | 936 | * %rdi: ctx, CTX |
---|
942 | 937 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
962 | 957 | vzeroupper; |
---|
963 | 958 | |
---|
964 | 959 | FRAME_END |
---|
965 | | - ret; |
---|
966 | | -ENDPROC(camellia_ecb_enc_32way) |
---|
| 960 | + RET; |
---|
| 961 | +SYM_FUNC_END(camellia_ecb_enc_32way) |
---|
967 | 962 | |
---|
968 | | -ENTRY(camellia_ecb_dec_32way) |
---|
| 963 | +SYM_FUNC_START(camellia_ecb_dec_32way) |
---|
969 | 964 | /* input: |
---|
970 | 965 | * %rdi: ctx, CTX |
---|
971 | 966 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
996 | 991 | vzeroupper; |
---|
997 | 992 | |
---|
998 | 993 | FRAME_END |
---|
999 | | - ret; |
---|
1000 | | -ENDPROC(camellia_ecb_dec_32way) |
---|
| 994 | + RET; |
---|
| 995 | +SYM_FUNC_END(camellia_ecb_dec_32way) |
---|
1001 | 996 | |
---|
1002 | | -ENTRY(camellia_cbc_dec_32way) |
---|
| 997 | +SYM_FUNC_START(camellia_cbc_dec_32way) |
---|
1003 | 998 | /* input: |
---|
1004 | 999 | * %rdi: ctx, CTX |
---|
1005 | 1000 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
1064 | 1059 | vzeroupper; |
---|
1065 | 1060 | |
---|
1066 | 1061 | FRAME_END |
---|
1067 | | - ret; |
---|
1068 | | -ENDPROC(camellia_cbc_dec_32way) |
---|
| 1062 | + RET; |
---|
| 1063 | +SYM_FUNC_END(camellia_cbc_dec_32way) |
---|
1069 | 1064 | |
---|
1070 | 1065 | #define inc_le128(x, minus_one, tmp) \ |
---|
1071 | 1066 | vpcmpeqq minus_one, x, tmp; \ |
---|
.. | .. |
---|
1081 | 1076 | vpslldq $8, tmp1, tmp1; \ |
---|
1082 | 1077 | vpsubq tmp1, x, x; |
---|
1083 | 1078 | |
---|
1084 | | -ENTRY(camellia_ctr_32way) |
---|
| 1079 | +SYM_FUNC_START(camellia_ctr_32way) |
---|
1085 | 1080 | /* input: |
---|
1086 | 1081 | * %rdi: ctx, CTX |
---|
1087 | 1082 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
1204 | 1199 | vzeroupper; |
---|
1205 | 1200 | |
---|
1206 | 1201 | FRAME_END |
---|
1207 | | - ret; |
---|
1208 | | -ENDPROC(camellia_ctr_32way) |
---|
| 1202 | + RET; |
---|
| 1203 | +SYM_FUNC_END(camellia_ctr_32way) |
---|
1209 | 1204 | |
---|
1210 | 1205 | #define gf128mul_x_ble(iv, mask, tmp) \ |
---|
1211 | 1206 | vpsrad $31, iv, tmp; \ |
---|
.. | .. |
---|
1227 | 1222 | vpxor tmp1, iv, iv; |
---|
1228 | 1223 | |
---|
1229 | 1224 | .align 8 |
---|
1230 | | -camellia_xts_crypt_32way: |
---|
| 1225 | +SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) |
---|
1231 | 1226 | /* input: |
---|
1232 | 1227 | * %rdi: ctx, CTX |
---|
1233 | 1228 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
1344 | 1339 | vpxor 14 * 32(%rax), %ymm15, %ymm14; |
---|
1345 | 1340 | vpxor 15 * 32(%rax), %ymm15, %ymm15; |
---|
1346 | 1341 | |
---|
1347 | | - CALL_NOSPEC %r9; |
---|
| 1342 | + CALL_NOSPEC r9; |
---|
1348 | 1343 | |
---|
1349 | 1344 | addq $(16 * 32), %rsp; |
---|
1350 | 1345 | |
---|
.. | .. |
---|
1371 | 1366 | vzeroupper; |
---|
1372 | 1367 | |
---|
1373 | 1368 | FRAME_END |
---|
1374 | | - ret; |
---|
1375 | | -ENDPROC(camellia_xts_crypt_32way) |
---|
| 1369 | + RET; |
---|
| 1370 | +SYM_FUNC_END(camellia_xts_crypt_32way) |
---|
1376 | 1371 | |
---|
1377 | | -ENTRY(camellia_xts_enc_32way) |
---|
| 1372 | +SYM_FUNC_START(camellia_xts_enc_32way) |
---|
1378 | 1373 | /* input: |
---|
1379 | 1374 | * %rdi: ctx, CTX |
---|
1380 | 1375 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
1387 | 1382 | leaq __camellia_enc_blk32, %r9; |
---|
1388 | 1383 | |
---|
1389 | 1384 | jmp camellia_xts_crypt_32way; |
---|
1390 | | -ENDPROC(camellia_xts_enc_32way) |
---|
| 1385 | +SYM_FUNC_END(camellia_xts_enc_32way) |
---|
1391 | 1386 | |
---|
1392 | | -ENTRY(camellia_xts_dec_32way) |
---|
| 1387 | +SYM_FUNC_START(camellia_xts_dec_32way) |
---|
1393 | 1388 | /* input: |
---|
1394 | 1389 | * %rdi: ctx, CTX |
---|
1395 | 1390 | * %rsi: dst (32 blocks) |
---|
.. | .. |
---|
1405 | 1400 | leaq __camellia_dec_blk32, %r9; |
---|
1406 | 1401 | |
---|
1407 | 1402 | jmp camellia_xts_crypt_32way; |
---|
1408 | | -ENDPROC(camellia_xts_dec_32way) |
---|
| 1403 | +SYM_FUNC_END(camellia_xts_dec_32way) |
---|