.. | .. |
---|
189 | 189 | * larger and would only be 0.5% faster (on sandy-bridge). |
---|
190 | 190 | */ |
---|
191 | 191 | .align 8 |
---|
192 | | -roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: |
---|
| 192 | +SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
193 | 193 | roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, |
---|
194 | 194 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, |
---|
195 | 195 | %rcx, (%r9)); |
---|
196 | | - ret; |
---|
197 | | -ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
| 196 | + RET; |
---|
| 197 | +SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) |
---|
198 | 198 | |
---|
199 | 199 | .align 8 |
---|
200 | | -roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: |
---|
| 200 | +SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
201 | 201 | roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, |
---|
202 | 202 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, |
---|
203 | 203 | %rax, (%r9)); |
---|
204 | | - ret; |
---|
205 | | -ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
| 204 | + RET; |
---|
| 205 | +SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) |
---|
206 | 206 | |
---|
207 | 207 | /* |
---|
208 | 208 | * IN/OUT: |
---|
.. | .. |
---|
722 | 722 | .text |
---|
723 | 723 | |
---|
724 | 724 | .align 8 |
---|
725 | | -__camellia_enc_blk16: |
---|
| 725 | +SYM_FUNC_START_LOCAL(__camellia_enc_blk16) |
---|
726 | 726 | /* input: |
---|
727 | 727 | * %rdi: ctx, CTX |
---|
728 | 728 | * %rax: temporary storage, 256 bytes |
---|
.. | .. |
---|
787 | 787 | %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); |
---|
788 | 788 | |
---|
789 | 789 | FRAME_END |
---|
790 | | - ret; |
---|
| 790 | + RET; |
---|
791 | 791 | |
---|
792 | 792 | .align 8 |
---|
793 | 793 | .Lenc_max32: |
---|
.. | .. |
---|
806 | 806 | %xmm15, %rax, %rcx, 24); |
---|
807 | 807 | |
---|
808 | 808 | jmp .Lenc_done; |
---|
809 | | -ENDPROC(__camellia_enc_blk16) |
---|
| 809 | +SYM_FUNC_END(__camellia_enc_blk16) |
---|
810 | 810 | |
---|
811 | 811 | .align 8 |
---|
812 | | -__camellia_dec_blk16: |
---|
| 812 | +SYM_FUNC_START_LOCAL(__camellia_dec_blk16) |
---|
813 | 813 | /* input: |
---|
814 | 814 | * %rdi: ctx, CTX |
---|
815 | 815 | * %rax: temporary storage, 256 bytes |
---|
.. | .. |
---|
874 | 874 | %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); |
---|
875 | 875 | |
---|
876 | 876 | FRAME_END |
---|
877 | | - ret; |
---|
| 877 | + RET; |
---|
878 | 878 | |
---|
879 | 879 | .align 8 |
---|
880 | 880 | .Ldec_max32: |
---|
.. | .. |
---|
891 | 891 | ((key_table + (24) * 8) + 4)(CTX)); |
---|
892 | 892 | |
---|
893 | 893 | jmp .Ldec_max24; |
---|
894 | | -ENDPROC(__camellia_dec_blk16) |
---|
| 894 | +SYM_FUNC_END(__camellia_dec_blk16) |
---|
895 | 895 | |
---|
896 | | -ENTRY(camellia_ecb_enc_16way) |
---|
| 896 | +SYM_FUNC_START(camellia_ecb_enc_16way) |
---|
897 | 897 | /* input: |
---|
898 | 898 | * %rdi: ctx, CTX |
---|
899 | 899 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
915 | 915 | %xmm8, %rsi); |
---|
916 | 916 | |
---|
917 | 917 | FRAME_END |
---|
918 | | - ret; |
---|
919 | | -ENDPROC(camellia_ecb_enc_16way) |
---|
| 918 | + RET; |
---|
| 919 | +SYM_FUNC_END(camellia_ecb_enc_16way) |
---|
920 | 920 | |
---|
921 | | -ENTRY(camellia_ecb_dec_16way) |
---|
| 921 | +SYM_FUNC_START(camellia_ecb_dec_16way) |
---|
922 | 922 | /* input: |
---|
923 | 923 | * %rdi: ctx, CTX |
---|
924 | 924 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
945 | 945 | %xmm8, %rsi); |
---|
946 | 946 | |
---|
947 | 947 | FRAME_END |
---|
948 | | - ret; |
---|
949 | | -ENDPROC(camellia_ecb_dec_16way) |
---|
| 948 | + RET; |
---|
| 949 | +SYM_FUNC_END(camellia_ecb_dec_16way) |
---|
950 | 950 | |
---|
951 | | -ENTRY(camellia_cbc_dec_16way) |
---|
| 951 | +SYM_FUNC_START(camellia_cbc_dec_16way) |
---|
952 | 952 | /* input: |
---|
953 | 953 | * %rdi: ctx, CTX |
---|
954 | 954 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
996 | 996 | %xmm8, %rsi); |
---|
997 | 997 | |
---|
998 | 998 | FRAME_END |
---|
999 | | - ret; |
---|
1000 | | -ENDPROC(camellia_cbc_dec_16way) |
---|
| 999 | + RET; |
---|
| 1000 | +SYM_FUNC_END(camellia_cbc_dec_16way) |
---|
1001 | 1001 | |
---|
1002 | 1002 | #define inc_le128(x, minus_one, tmp) \ |
---|
1003 | 1003 | vpcmpeqq minus_one, x, tmp; \ |
---|
.. | .. |
---|
1005 | 1005 | vpslldq $8, tmp, tmp; \ |
---|
1006 | 1006 | vpsubq tmp, x, x; |
---|
1007 | 1007 | |
---|
1008 | | -ENTRY(camellia_ctr_16way) |
---|
| 1008 | +SYM_FUNC_START(camellia_ctr_16way) |
---|
1009 | 1009 | /* input: |
---|
1010 | 1010 | * %rdi: ctx, CTX |
---|
1011 | 1011 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
1109 | 1109 | %xmm8, %rsi); |
---|
1110 | 1110 | |
---|
1111 | 1111 | FRAME_END |
---|
1112 | | - ret; |
---|
1113 | | -ENDPROC(camellia_ctr_16way) |
---|
| 1112 | + RET; |
---|
| 1113 | +SYM_FUNC_END(camellia_ctr_16way) |
---|
1114 | 1114 | |
---|
1115 | 1115 | #define gf128mul_x_ble(iv, mask, tmp) \ |
---|
1116 | 1116 | vpsrad $31, iv, tmp; \ |
---|
.. | .. |
---|
1120 | 1120 | vpxor tmp, iv, iv; |
---|
1121 | 1121 | |
---|
1122 | 1122 | .align 8 |
---|
1123 | | -camellia_xts_crypt_16way: |
---|
| 1123 | +SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) |
---|
1124 | 1124 | /* input: |
---|
1125 | 1125 | * %rdi: ctx, CTX |
---|
1126 | 1126 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
1228 | 1228 | vpxor 14 * 16(%rax), %xmm15, %xmm14; |
---|
1229 | 1229 | vpxor 15 * 16(%rax), %xmm15, %xmm15; |
---|
1230 | 1230 | |
---|
1231 | | - CALL_NOSPEC %r9; |
---|
| 1231 | + CALL_NOSPEC r9; |
---|
1232 | 1232 | |
---|
1233 | 1233 | addq $(16 * 16), %rsp; |
---|
1234 | 1234 | |
---|
.. | .. |
---|
1253 | 1253 | %xmm8, %rsi); |
---|
1254 | 1254 | |
---|
1255 | 1255 | FRAME_END |
---|
1256 | | - ret; |
---|
1257 | | -ENDPROC(camellia_xts_crypt_16way) |
---|
| 1256 | + RET; |
---|
| 1257 | +SYM_FUNC_END(camellia_xts_crypt_16way) |
---|
1258 | 1258 | |
---|
1259 | | -ENTRY(camellia_xts_enc_16way) |
---|
| 1259 | +SYM_FUNC_START(camellia_xts_enc_16way) |
---|
1260 | 1260 | /* input: |
---|
1261 | 1261 | * %rdi: ctx, CTX |
---|
1262 | 1262 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
1268 | 1268 | leaq __camellia_enc_blk16, %r9; |
---|
1269 | 1269 | |
---|
1270 | 1270 | jmp camellia_xts_crypt_16way; |
---|
1271 | | -ENDPROC(camellia_xts_enc_16way) |
---|
| 1271 | +SYM_FUNC_END(camellia_xts_enc_16way) |
---|
1272 | 1272 | |
---|
1273 | | -ENTRY(camellia_xts_dec_16way) |
---|
| 1273 | +SYM_FUNC_START(camellia_xts_dec_16way) |
---|
1274 | 1274 | /* input: |
---|
1275 | 1275 | * %rdi: ctx, CTX |
---|
1276 | 1276 | * %rsi: dst (16 blocks) |
---|
.. | .. |
---|
1286 | 1286 | leaq __camellia_dec_blk16, %r9; |
---|
1287 | 1287 | |
---|
1288 | 1288 | jmp camellia_xts_crypt_16way; |
---|
1289 | | -ENDPROC(camellia_xts_dec_16way) |
---|
| 1289 | +SYM_FUNC_END(camellia_xts_dec_16way) |
---|