hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/x86/crypto/aesni-intel_asm.S
....@@ -1,3 +1,4 @@
1
+/* SPDX-License-Identifier: GPL-2.0-or-later */
12 /*
23 * Implement AES algorithm in Intel AES-NI instructions.
34 *
....@@ -22,15 +23,9 @@
2223 *
2324 * Ported x86_64 version to x86:
2425 * Author: Mathias Krause <minipli@googlemail.com>
25
- *
26
- * This program is free software; you can redistribute it and/or modify
27
- * it under the terms of the GNU General Public License as published by
28
- * the Free Software Foundation; either version 2 of the License, or
29
- * (at your option) any later version.
3026 */
3127
3228 #include <linux/linkage.h>
33
-#include <asm/inst.h>
3429 #include <asm/frame.h>
3530 #include <asm/nospec-branch.h>
3631
....@@ -205,7 +200,7 @@
205200 mov \SUBKEY, %r12
206201 movdqu (%r12), \TMP3
207202 movdqa SHUF_MASK(%rip), \TMP2
208
- PSHUFB_XMM \TMP2, \TMP3
203
+ pshufb \TMP2, \TMP3
209204
210205 # precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
211206
....@@ -267,7 +262,7 @@
267262 movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
268263
269264 movdqa SHUF_MASK(%rip), %xmm2
270
- PSHUFB_XMM %xmm2, %xmm0
265
+ pshufb %xmm2, %xmm0
271266 movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
272267
273268 PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
....@@ -323,7 +318,7 @@
323318
324319 # Main loop - Encrypt/Decrypt remaining blocks
325320
326
- cmp $0, %r13
321
+ test %r13, %r13
327322 je _zero_cipher_left_\@
328323 sub $64, %r13
329324 je _four_cipher_left_\@
....@@ -351,7 +346,7 @@
351346 paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
352347 movdqu %xmm0, CurCount(%arg2)
353348 movdqa SHUF_MASK(%rip), %xmm10
354
- PSHUFB_XMM %xmm10, %xmm0
349
+ pshufb %xmm10, %xmm0
355350
356351 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
357352 movdqu %xmm0, PBlockEncKey(%arg2)
....@@ -381,7 +376,7 @@
381376 # get the appropriate shuffle mask
382377 movdqu (%r12), %xmm2
383378 # shift right 16-r13 bytes
384
- PSHUFB_XMM %xmm2, %xmm1
379
+ pshufb %xmm2, %xmm1
385380
386381 _data_read_\@:
387382 lea ALL_F+16(%rip), %r12
....@@ -397,12 +392,12 @@
397392 .ifc \operation, dec
398393 pand %xmm1, %xmm2
399394 movdqa SHUF_MASK(%rip), %xmm10
400
- PSHUFB_XMM %xmm10 ,%xmm2
395
+ pshufb %xmm10 ,%xmm2
401396
402397 pxor %xmm2, %xmm8
403398 .else
404399 movdqa SHUF_MASK(%rip), %xmm10
405
- PSHUFB_XMM %xmm10,%xmm0
400
+ pshufb %xmm10,%xmm0
406401
407402 pxor %xmm0, %xmm8
408403 .endif
....@@ -412,17 +407,17 @@
412407 # GHASH computation for the last <16 byte block
413408 movdqa SHUF_MASK(%rip), %xmm10
414409 # shuffle xmm0 back to output as ciphertext
415
- PSHUFB_XMM %xmm10, %xmm0
410
+ pshufb %xmm10, %xmm0
416411 .endif
417412
418413 # Output %r13 bytes
419
- MOVQ_R64_XMM %xmm0, %rax
414
+ movq %xmm0, %rax
420415 cmp $8, %r13
421416 jle _less_than_8_bytes_left_\@
422417 mov %rax, (%arg3 , %r11, 1)
423418 add $8, %r11
424419 psrldq $8, %xmm0
425
- MOVQ_R64_XMM %xmm0, %rax
420
+ movq %xmm0, %rax
426421 sub $8, %r13
427422 _less_than_8_bytes_left_\@:
428423 mov %al, (%arg3, %r11, 1)
....@@ -442,7 +437,7 @@
442437
443438 mov PBlockLen(%arg2), %r12
444439
445
- cmp $0, %r12
440
+ test %r12, %r12
446441 je _partial_done\@
447442
448443 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
....@@ -453,7 +448,7 @@
453448 movd %r12d, %xmm15 # len(A) in %xmm15
454449 mov InLen(%arg2), %r12
455450 shl $3, %r12 # len(C) in bits (*128)
456
- MOVQ_R64_XMM %r12, %xmm1
451
+ movq %r12, %xmm1
457452
458453 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
459454 pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
....@@ -461,7 +456,7 @@
461456 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
462457 # final GHASH computation
463458 movdqa SHUF_MASK(%rip), %xmm10
464
- PSHUFB_XMM %xmm10, %xmm8
459
+ pshufb %xmm10, %xmm8
465460
466461 movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
467462 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
....@@ -474,12 +469,12 @@
474469 cmp $8, %r11
475470 jl _T_4_\@
476471 _T_8_\@:
477
- MOVQ_R64_XMM %xmm0, %rax
472
+ movq %xmm0, %rax
478473 mov %rax, (%r10)
479474 add $8, %r10
480475 sub $8, %r11
481476 psrldq $8, %xmm0
482
- cmp $0, %r11
477
+ test %r11, %r11
483478 je _return_T_done_\@
484479 _T_4_\@:
485480 movd %xmm0, %eax
....@@ -487,7 +482,7 @@
487482 add $4, %r10
488483 sub $4, %r11
489484 psrldq $4, %xmm0
490
- cmp $0, %r11
485
+ test %r11, %r11
491486 je _return_T_done_\@
492487 _T_123_\@:
493488 movd %xmm0, %eax
....@@ -522,9 +517,9 @@
522517 pshufd $78, \HK, \TMP3
523518 pxor \GH, \TMP2 # TMP2 = a1+a0
524519 pxor \HK, \TMP3 # TMP3 = b1+b0
525
- PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1
526
- PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0
527
- PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
520
+ pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1
521
+ pclmulqdq $0x00, \HK, \GH # GH = a0*b0
522
+ pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
528523 pxor \GH, \TMP2
529524 pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
530525 movdqa \TMP2, \TMP3
....@@ -574,7 +569,7 @@
574569 cmp $8, \DLEN
575570 jl _read_lt8_\@
576571 mov (\DPTR), %rax
577
- MOVQ_R64_XMM %rax, \XMMDst
572
+ movq %rax, \XMMDst
578573 sub $8, \DLEN
579574 jz _done_read_partial_block_\@
580575 xor %eax, %eax
....@@ -583,7 +578,7 @@
583578 mov 7(\DPTR, \DLEN, 1), %al
584579 dec \DLEN
585580 jnz _read_next_byte_\@
586
- MOVQ_R64_XMM %rax, \XMM1
581
+ movq %rax, \XMM1
587582 pslldq $8, \XMM1
588583 por \XMM1, \XMMDst
589584 jmp _done_read_partial_block_\@
....@@ -594,7 +589,7 @@
594589 mov -1(\DPTR, \DLEN, 1), %al
595590 dec \DLEN
596591 jnz _read_next_byte_lt8_\@
597
- MOVQ_R64_XMM %rax, \XMMDst
592
+ movq %rax, \XMMDst
598593 _done_read_partial_block_\@:
599594 .endm
600595
....@@ -612,7 +607,7 @@
612607 jl _get_AAD_rest\@
613608 _get_AAD_blocks\@:
614609 movdqu (%r10), \TMP7
615
- PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
610
+ pshufb %xmm14, \TMP7 # byte-reflect the AAD data
616611 pxor \TMP7, \TMP6
617612 GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
618613 add $16, %r10
....@@ -624,11 +619,11 @@
624619
625620 /* read the last <16B of AAD */
626621 _get_AAD_rest\@:
627
- cmp $0, %r11
622
+ test %r11, %r11
628623 je _get_AAD_done\@
629624
630625 READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
631
- PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
626
+ pshufb %xmm14, \TMP7 # byte-reflect the AAD data
632627 pxor \TMP6, \TMP7
633628 GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
634629 movdqu \TMP7, \TMP6
....@@ -645,7 +640,7 @@
645640 .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
646641 AAD_HASH operation
647642 mov PBlockLen(%arg2), %r13
648
- cmp $0, %r13
643
+ test %r13, %r13
649644 je _partial_block_done_\@ # Leave Macro if no partial blocks
650645 # Read in input data without over reading
651646 cmp $16, \PLAIN_CYPH_LEN
....@@ -671,7 +666,7 @@
671666 # r16-r13 is the number of bytes in plaintext mod 16)
672667 add %r13, %r12
673668 movdqu (%r12), %xmm2 # get the appropriate shuffle mask
674
- PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes
669
+ pshufb %xmm2, %xmm9 # shift right r13 bytes
675670
676671 .ifc \operation, dec
677672 movdqa %xmm1, %xmm3
....@@ -693,11 +688,11 @@
693688
694689 pand %xmm1, %xmm3
695690 movdqa SHUF_MASK(%rip), %xmm10
696
- PSHUFB_XMM %xmm10, %xmm3
697
- PSHUFB_XMM %xmm2, %xmm3
691
+ pshufb %xmm10, %xmm3
692
+ pshufb %xmm2, %xmm3
698693 pxor %xmm3, \AAD_HASH
699694
700
- cmp $0, %r10
695
+ test %r10, %r10
701696 jl _partial_incomplete_1_\@
702697
703698 # GHASH computation for the last <16 Byte block
....@@ -728,11 +723,11 @@
728723 pand %xmm1, %xmm9
729724
730725 movdqa SHUF_MASK(%rip), %xmm1
731
- PSHUFB_XMM %xmm1, %xmm9
732
- PSHUFB_XMM %xmm2, %xmm9
726
+ pshufb %xmm1, %xmm9
727
+ pshufb %xmm2, %xmm9
733728 pxor %xmm9, \AAD_HASH
734729
735
- cmp $0, %r10
730
+ test %r10, %r10
736731 jl _partial_incomplete_2_\@
737732
738733 # GHASH computation for the last <16 Byte block
....@@ -748,11 +743,11 @@
748743
749744 movdqa SHUF_MASK(%rip), %xmm10
750745 # shuffle xmm9 back to output as ciphertext
751
- PSHUFB_XMM %xmm10, %xmm9
752
- PSHUFB_XMM %xmm2, %xmm9
746
+ pshufb %xmm10, %xmm9
747
+ pshufb %xmm2, %xmm9
753748 .endif
754749 # output encrypted Bytes
755
- cmp $0, %r10
750
+ test %r10, %r10
756751 jl _partial_fill_\@
757752 mov %r13, %r12
758753 mov $16, %r13
....@@ -763,14 +758,14 @@
763758 mov \PLAIN_CYPH_LEN, %r13
764759 _count_set_\@:
765760 movdqa %xmm9, %xmm0
766
- MOVQ_R64_XMM %xmm0, %rax
761
+ movq %xmm0, %rax
767762 cmp $8, %r13
768763 jle _less_than_8_bytes_left_\@
769764
770765 mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
771766 add $8, \DATA_OFFSET
772767 psrldq $8, %xmm0
773
- MOVQ_R64_XMM %xmm0, %rax
768
+ movq %xmm0, %rax
774769 sub $8, %r13
775770 _less_than_8_bytes_left_\@:
776771 movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
....@@ -814,7 +809,7 @@
814809 .else
815810 MOVADQ \XMM0, %xmm\index
816811 .endif
817
- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
812
+ pshufb %xmm14, %xmm\index # perform a 16 byte swap
818813 pxor \TMP2, %xmm\index
819814 .endr
820815 lea 0x10(%arg1),%r10
....@@ -825,7 +820,7 @@
825820 aes_loop_initial_\@:
826821 MOVADQ (%r10),\TMP1
827822 .irpc index, \i_seq
828
- AESENC \TMP1, %xmm\index
823
+ aesenc \TMP1, %xmm\index
829824 .endr
830825 add $16,%r10
831826 sub $1,%eax
....@@ -833,7 +828,7 @@
833828
834829 MOVADQ (%r10), \TMP1
835830 .irpc index, \i_seq
836
- AESENCLAST \TMP1, %xmm\index # Last Round
831
+ aesenclast \TMP1, %xmm\index # Last Round
837832 .endr
838833 .irpc index, \i_seq
839834 movdqu (%arg4 , %r11, 1), \TMP1
....@@ -845,7 +840,7 @@
845840 .ifc \operation, dec
846841 movdqa \TMP1, %xmm\index
847842 .endif
848
- PSHUFB_XMM %xmm14, %xmm\index
843
+ pshufb %xmm14, %xmm\index
849844
850845 # prepare plaintext/ciphertext for GHASH computation
851846 .endr
....@@ -880,19 +875,19 @@
880875 MOVADQ ONE(%RIP),\TMP1
881876 paddd \TMP1, \XMM0 # INCR Y0
882877 MOVADQ \XMM0, \XMM1
883
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
878
+ pshufb %xmm14, \XMM1 # perform a 16 byte swap
884879
885880 paddd \TMP1, \XMM0 # INCR Y0
886881 MOVADQ \XMM0, \XMM2
887
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
882
+ pshufb %xmm14, \XMM2 # perform a 16 byte swap
888883
889884 paddd \TMP1, \XMM0 # INCR Y0
890885 MOVADQ \XMM0, \XMM3
891
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
886
+ pshufb %xmm14, \XMM3 # perform a 16 byte swap
892887
893888 paddd \TMP1, \XMM0 # INCR Y0
894889 MOVADQ \XMM0, \XMM4
895
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
890
+ pshufb %xmm14, \XMM4 # perform a 16 byte swap
896891
897892 MOVADQ 0(%arg1),\TMP1
898893 pxor \TMP1, \XMM1
....@@ -901,17 +896,17 @@
901896 pxor \TMP1, \XMM4
902897 .irpc index, 1234 # do 4 rounds
903898 movaps 0x10*\index(%arg1), \TMP1
904
- AESENC \TMP1, \XMM1
905
- AESENC \TMP1, \XMM2
906
- AESENC \TMP1, \XMM3
907
- AESENC \TMP1, \XMM4
899
+ aesenc \TMP1, \XMM1
900
+ aesenc \TMP1, \XMM2
901
+ aesenc \TMP1, \XMM3
902
+ aesenc \TMP1, \XMM4
908903 .endr
909904 .irpc index, 56789 # do next 5 rounds
910905 movaps 0x10*\index(%arg1), \TMP1
911
- AESENC \TMP1, \XMM1
912
- AESENC \TMP1, \XMM2
913
- AESENC \TMP1, \XMM3
914
- AESENC \TMP1, \XMM4
906
+ aesenc \TMP1, \XMM1
907
+ aesenc \TMP1, \XMM2
908
+ aesenc \TMP1, \XMM3
909
+ aesenc \TMP1, \XMM4
915910 .endr
916911 lea 0xa0(%arg1),%r10
917912 mov keysize,%eax
....@@ -922,7 +917,7 @@
922917 aes_loop_pre_\@:
923918 MOVADQ (%r10),\TMP2
924919 .irpc index, 1234
925
- AESENC \TMP2, %xmm\index
920
+ aesenc \TMP2, %xmm\index
926921 .endr
927922 add $16,%r10
928923 sub $1,%eax
....@@ -930,10 +925,10 @@
930925
931926 aes_loop_pre_done\@:
932927 MOVADQ (%r10), \TMP2
933
- AESENCLAST \TMP2, \XMM1
934
- AESENCLAST \TMP2, \XMM2
935
- AESENCLAST \TMP2, \XMM3
936
- AESENCLAST \TMP2, \XMM4
928
+ aesenclast \TMP2, \XMM1
929
+ aesenclast \TMP2, \XMM2
930
+ aesenclast \TMP2, \XMM3
931
+ aesenclast \TMP2, \XMM4
937932 movdqu 16*0(%arg4 , %r11 , 1), \TMP1
938933 pxor \TMP1, \XMM1
939934 .ifc \operation, dec
....@@ -965,12 +960,12 @@
965960 .endif
966961
967962 add $64, %r11
968
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
963
+ pshufb %xmm14, \XMM1 # perform a 16 byte swap
969964 pxor \XMMDst, \XMM1
970965 # combine GHASHed value with the corresponding ciphertext
971
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
972
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
973
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
966
+ pshufb %xmm14, \XMM2 # perform a 16 byte swap
967
+ pshufb %xmm14, \XMM3 # perform a 16 byte swap
968
+ pshufb %xmm14, \XMM4 # perform a 16 byte swap
974969
975970 _initial_blocks_done\@:
976971
....@@ -998,7 +993,7 @@
998993 pxor \XMM5, \TMP6
999994 paddd ONE(%rip), \XMM0 # INCR CNT
1000995 movdqu HashKey_4(%arg2), \TMP5
1001
- PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
996
+ pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
1002997 movdqa \XMM0, \XMM1
1003998 paddd ONE(%rip), \XMM0 # INCR CNT
1004999 movdqa \XMM0, \XMM2
....@@ -1006,51 +1001,51 @@
10061001 movdqa \XMM0, \XMM3
10071002 paddd ONE(%rip), \XMM0 # INCR CNT
10081003 movdqa \XMM0, \XMM4
1009
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
1010
- PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
1011
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
1012
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
1013
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
1004
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
1005
+ pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
1006
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
1007
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
1008
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
10141009
10151010 pxor (%arg1), \XMM1
10161011 pxor (%arg1), \XMM2
10171012 pxor (%arg1), \XMM3
10181013 pxor (%arg1), \XMM4
10191014 movdqu HashKey_4_k(%arg2), \TMP5
1020
- PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
1015
+ pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
10211016 movaps 0x10(%arg1), \TMP1
1022
- AESENC \TMP1, \XMM1 # Round 1
1023
- AESENC \TMP1, \XMM2
1024
- AESENC \TMP1, \XMM3
1025
- AESENC \TMP1, \XMM4
1017
+ aesenc \TMP1, \XMM1 # Round 1
1018
+ aesenc \TMP1, \XMM2
1019
+ aesenc \TMP1, \XMM3
1020
+ aesenc \TMP1, \XMM4
10261021 movaps 0x20(%arg1), \TMP1
1027
- AESENC \TMP1, \XMM1 # Round 2
1028
- AESENC \TMP1, \XMM2
1029
- AESENC \TMP1, \XMM3
1030
- AESENC \TMP1, \XMM4
1022
+ aesenc \TMP1, \XMM1 # Round 2
1023
+ aesenc \TMP1, \XMM2
1024
+ aesenc \TMP1, \XMM3
1025
+ aesenc \TMP1, \XMM4
10311026 movdqa \XMM6, \TMP1
10321027 pshufd $78, \XMM6, \TMP2
10331028 pxor \XMM6, \TMP2
10341029 movdqu HashKey_3(%arg2), \TMP5
1035
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
1030
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
10361031 movaps 0x30(%arg1), \TMP3
1037
- AESENC \TMP3, \XMM1 # Round 3
1038
- AESENC \TMP3, \XMM2
1039
- AESENC \TMP3, \XMM3
1040
- AESENC \TMP3, \XMM4
1041
- PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
1032
+ aesenc \TMP3, \XMM1 # Round 3
1033
+ aesenc \TMP3, \XMM2
1034
+ aesenc \TMP3, \XMM3
1035
+ aesenc \TMP3, \XMM4
1036
+ pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
10421037 movaps 0x40(%arg1), \TMP3
1043
- AESENC \TMP3, \XMM1 # Round 4
1044
- AESENC \TMP3, \XMM2
1045
- AESENC \TMP3, \XMM3
1046
- AESENC \TMP3, \XMM4
1038
+ aesenc \TMP3, \XMM1 # Round 4
1039
+ aesenc \TMP3, \XMM2
1040
+ aesenc \TMP3, \XMM3
1041
+ aesenc \TMP3, \XMM4
10471042 movdqu HashKey_3_k(%arg2), \TMP5
1048
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1043
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
10491044 movaps 0x50(%arg1), \TMP3
1050
- AESENC \TMP3, \XMM1 # Round 5
1051
- AESENC \TMP3, \XMM2
1052
- AESENC \TMP3, \XMM3
1053
- AESENC \TMP3, \XMM4
1045
+ aesenc \TMP3, \XMM1 # Round 5
1046
+ aesenc \TMP3, \XMM2
1047
+ aesenc \TMP3, \XMM3
1048
+ aesenc \TMP3, \XMM4
10541049 pxor \TMP1, \TMP4
10551050 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
10561051 pxor \XMM6, \XMM5
....@@ -1062,25 +1057,25 @@
10621057
10631058 # Multiply TMP5 * HashKey using karatsuba
10641059
1065
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1060
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
10661061 movaps 0x60(%arg1), \TMP3
1067
- AESENC \TMP3, \XMM1 # Round 6
1068
- AESENC \TMP3, \XMM2
1069
- AESENC \TMP3, \XMM3
1070
- AESENC \TMP3, \XMM4
1071
- PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
1062
+ aesenc \TMP3, \XMM1 # Round 6
1063
+ aesenc \TMP3, \XMM2
1064
+ aesenc \TMP3, \XMM3
1065
+ aesenc \TMP3, \XMM4
1066
+ pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
10721067 movaps 0x70(%arg1), \TMP3
1073
- AESENC \TMP3, \XMM1 # Round 7
1074
- AESENC \TMP3, \XMM2
1075
- AESENC \TMP3, \XMM3
1076
- AESENC \TMP3, \XMM4
1068
+ aesenc \TMP3, \XMM1 # Round 7
1069
+ aesenc \TMP3, \XMM2
1070
+ aesenc \TMP3, \XMM3
1071
+ aesenc \TMP3, \XMM4
10771072 movdqu HashKey_2_k(%arg2), \TMP5
1078
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1073
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
10791074 movaps 0x80(%arg1), \TMP3
1080
- AESENC \TMP3, \XMM1 # Round 8
1081
- AESENC \TMP3, \XMM2
1082
- AESENC \TMP3, \XMM3
1083
- AESENC \TMP3, \XMM4
1075
+ aesenc \TMP3, \XMM1 # Round 8
1076
+ aesenc \TMP3, \XMM2
1077
+ aesenc \TMP3, \XMM3
1078
+ aesenc \TMP3, \XMM4
10841079 pxor \TMP1, \TMP4
10851080 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
10861081 pxor \XMM7, \XMM5
....@@ -1093,13 +1088,13 @@
10931088 pshufd $78, \XMM8, \TMP2
10941089 pxor \XMM8, \TMP2
10951090 movdqu HashKey(%arg2), \TMP5
1096
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1091
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
10971092 movaps 0x90(%arg1), \TMP3
1098
- AESENC \TMP3, \XMM1 # Round 9
1099
- AESENC \TMP3, \XMM2
1100
- AESENC \TMP3, \XMM3
1101
- AESENC \TMP3, \XMM4
1102
- PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
1093
+ aesenc \TMP3, \XMM1 # Round 9
1094
+ aesenc \TMP3, \XMM2
1095
+ aesenc \TMP3, \XMM3
1096
+ aesenc \TMP3, \XMM4
1097
+ pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
11031098 lea 0xa0(%arg1),%r10
11041099 mov keysize,%eax
11051100 shr $2,%eax # 128->4, 192->6, 256->8
....@@ -1109,7 +1104,7 @@
11091104 aes_loop_par_enc\@:
11101105 MOVADQ (%r10),\TMP3
11111106 .irpc index, 1234
1112
- AESENC \TMP3, %xmm\index
1107
+ aesenc \TMP3, %xmm\index
11131108 .endr
11141109 add $16,%r10
11151110 sub $1,%eax
....@@ -1117,12 +1112,12 @@
11171112
11181113 aes_loop_par_enc_done\@:
11191114 MOVADQ (%r10), \TMP3
1120
- AESENCLAST \TMP3, \XMM1 # Round 10
1121
- AESENCLAST \TMP3, \XMM2
1122
- AESENCLAST \TMP3, \XMM3
1123
- AESENCLAST \TMP3, \XMM4
1115
+ aesenclast \TMP3, \XMM1 # Round 10
1116
+ aesenclast \TMP3, \XMM2
1117
+ aesenclast \TMP3, \XMM3
1118
+ aesenclast \TMP3, \XMM4
11241119 movdqu HashKey_k(%arg2), \TMP5
1125
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1120
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
11261121 movdqu (%arg4,%r11,1), \TMP3
11271122 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
11281123 movdqu 16(%arg4,%r11,1), \TMP3
....@@ -1135,10 +1130,10 @@
11351130 movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer
11361131 movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer
11371132 movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer
1138
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
1139
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
1140
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
1141
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
1133
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
1134
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
1135
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
1136
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
11421137
11431138 pxor \TMP4, \TMP1
11441139 pxor \XMM8, \XMM5
....@@ -1206,7 +1201,7 @@
12061201 pxor \XMM5, \TMP6
12071202 paddd ONE(%rip), \XMM0 # INCR CNT
12081203 movdqu HashKey_4(%arg2), \TMP5
1209
- PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
1204
+ pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
12101205 movdqa \XMM0, \XMM1
12111206 paddd ONE(%rip), \XMM0 # INCR CNT
12121207 movdqa \XMM0, \XMM2
....@@ -1214,51 +1209,51 @@
12141209 movdqa \XMM0, \XMM3
12151210 paddd ONE(%rip), \XMM0 # INCR CNT
12161211 movdqa \XMM0, \XMM4
1217
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
1218
- PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
1219
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
1220
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
1221
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
1212
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
1213
+ pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
1214
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
1215
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
1216
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
12221217
12231218 pxor (%arg1), \XMM1
12241219 pxor (%arg1), \XMM2
12251220 pxor (%arg1), \XMM3
12261221 pxor (%arg1), \XMM4
12271222 movdqu HashKey_4_k(%arg2), \TMP5
1228
- PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
1223
+ pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
12291224 movaps 0x10(%arg1), \TMP1
1230
- AESENC \TMP1, \XMM1 # Round 1
1231
- AESENC \TMP1, \XMM2
1232
- AESENC \TMP1, \XMM3
1233
- AESENC \TMP1, \XMM4
1225
+ aesenc \TMP1, \XMM1 # Round 1
1226
+ aesenc \TMP1, \XMM2
1227
+ aesenc \TMP1, \XMM3
1228
+ aesenc \TMP1, \XMM4
12341229 movaps 0x20(%arg1), \TMP1
1235
- AESENC \TMP1, \XMM1 # Round 2
1236
- AESENC \TMP1, \XMM2
1237
- AESENC \TMP1, \XMM3
1238
- AESENC \TMP1, \XMM4
1230
+ aesenc \TMP1, \XMM1 # Round 2
1231
+ aesenc \TMP1, \XMM2
1232
+ aesenc \TMP1, \XMM3
1233
+ aesenc \TMP1, \XMM4
12391234 movdqa \XMM6, \TMP1
12401235 pshufd $78, \XMM6, \TMP2
12411236 pxor \XMM6, \TMP2
12421237 movdqu HashKey_3(%arg2), \TMP5
1243
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
1238
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
12441239 movaps 0x30(%arg1), \TMP3
1245
- AESENC \TMP3, \XMM1 # Round 3
1246
- AESENC \TMP3, \XMM2
1247
- AESENC \TMP3, \XMM3
1248
- AESENC \TMP3, \XMM4
1249
- PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
1240
+ aesenc \TMP3, \XMM1 # Round 3
1241
+ aesenc \TMP3, \XMM2
1242
+ aesenc \TMP3, \XMM3
1243
+ aesenc \TMP3, \XMM4
1244
+ pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
12501245 movaps 0x40(%arg1), \TMP3
1251
- AESENC \TMP3, \XMM1 # Round 4
1252
- AESENC \TMP3, \XMM2
1253
- AESENC \TMP3, \XMM3
1254
- AESENC \TMP3, \XMM4
1246
+ aesenc \TMP3, \XMM1 # Round 4
1247
+ aesenc \TMP3, \XMM2
1248
+ aesenc \TMP3, \XMM3
1249
+ aesenc \TMP3, \XMM4
12551250 movdqu HashKey_3_k(%arg2), \TMP5
1256
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1251
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
12571252 movaps 0x50(%arg1), \TMP3
1258
- AESENC \TMP3, \XMM1 # Round 5
1259
- AESENC \TMP3, \XMM2
1260
- AESENC \TMP3, \XMM3
1261
- AESENC \TMP3, \XMM4
1253
+ aesenc \TMP3, \XMM1 # Round 5
1254
+ aesenc \TMP3, \XMM2
1255
+ aesenc \TMP3, \XMM3
1256
+ aesenc \TMP3, \XMM4
12621257 pxor \TMP1, \TMP4
12631258 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
12641259 pxor \XMM6, \XMM5
....@@ -1270,25 +1265,25 @@
12701265
12711266 # Multiply TMP5 * HashKey using karatsuba
12721267
1273
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1268
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
12741269 movaps 0x60(%arg1), \TMP3
1275
- AESENC \TMP3, \XMM1 # Round 6
1276
- AESENC \TMP3, \XMM2
1277
- AESENC \TMP3, \XMM3
1278
- AESENC \TMP3, \XMM4
1279
- PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
1270
+ aesenc \TMP3, \XMM1 # Round 6
1271
+ aesenc \TMP3, \XMM2
1272
+ aesenc \TMP3, \XMM3
1273
+ aesenc \TMP3, \XMM4
1274
+ pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
12801275 movaps 0x70(%arg1), \TMP3
1281
- AESENC \TMP3, \XMM1 # Round 7
1282
- AESENC \TMP3, \XMM2
1283
- AESENC \TMP3, \XMM3
1284
- AESENC \TMP3, \XMM4
1276
+ aesenc \TMP3, \XMM1 # Round 7
1277
+ aesenc \TMP3, \XMM2
1278
+ aesenc \TMP3, \XMM3
1279
+ aesenc \TMP3, \XMM4
12851280 movdqu HashKey_2_k(%arg2), \TMP5
1286
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1281
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
12871282 movaps 0x80(%arg1), \TMP3
1288
- AESENC \TMP3, \XMM1 # Round 8
1289
- AESENC \TMP3, \XMM2
1290
- AESENC \TMP3, \XMM3
1291
- AESENC \TMP3, \XMM4
1283
+ aesenc \TMP3, \XMM1 # Round 8
1284
+ aesenc \TMP3, \XMM2
1285
+ aesenc \TMP3, \XMM3
1286
+ aesenc \TMP3, \XMM4
12921287 pxor \TMP1, \TMP4
12931288 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
12941289 pxor \XMM7, \XMM5
....@@ -1301,13 +1296,13 @@
13011296 pshufd $78, \XMM8, \TMP2
13021297 pxor \XMM8, \TMP2
13031298 movdqu HashKey(%arg2), \TMP5
1304
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1299
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
13051300 movaps 0x90(%arg1), \TMP3
1306
- AESENC \TMP3, \XMM1 # Round 9
1307
- AESENC \TMP3, \XMM2
1308
- AESENC \TMP3, \XMM3
1309
- AESENC \TMP3, \XMM4
1310
- PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
1301
+ aesenc \TMP3, \XMM1 # Round 9
1302
+ aesenc \TMP3, \XMM2
1303
+ aesenc \TMP3, \XMM3
1304
+ aesenc \TMP3, \XMM4
1305
+ pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
13111306 lea 0xa0(%arg1),%r10
13121307 mov keysize,%eax
13131308 shr $2,%eax # 128->4, 192->6, 256->8
....@@ -1317,7 +1312,7 @@
13171312 aes_loop_par_dec\@:
13181313 MOVADQ (%r10),\TMP3
13191314 .irpc index, 1234
1320
- AESENC \TMP3, %xmm\index
1315
+ aesenc \TMP3, %xmm\index
13211316 .endr
13221317 add $16,%r10
13231318 sub $1,%eax
....@@ -1325,12 +1320,12 @@
13251320
13261321 aes_loop_par_dec_done\@:
13271322 MOVADQ (%r10), \TMP3
1328
- AESENCLAST \TMP3, \XMM1 # last round
1329
- AESENCLAST \TMP3, \XMM2
1330
- AESENCLAST \TMP3, \XMM3
1331
- AESENCLAST \TMP3, \XMM4
1323
+ aesenclast \TMP3, \XMM1 # last round
1324
+ aesenclast \TMP3, \XMM2
1325
+ aesenclast \TMP3, \XMM3
1326
+ aesenclast \TMP3, \XMM4
13321327 movdqu HashKey_k(%arg2), \TMP5
1333
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1328
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
13341329 movdqu (%arg4,%r11,1), \TMP3
13351330 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
13361331 movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer
....@@ -1347,10 +1342,10 @@
13471342 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
13481343 movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer
13491344 movdqa \TMP3, \XMM4
1350
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
1351
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
1352
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
1353
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
1345
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
1346
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
1347
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
1348
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
13541349
13551350 pxor \TMP4, \TMP1
13561351 pxor \XMM8, \XMM5
....@@ -1406,10 +1401,10 @@
14061401 pshufd $78, \XMM1, \TMP2
14071402 pxor \XMM1, \TMP2
14081403 movdqu HashKey_4(%arg2), \TMP5
1409
- PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
1410
- PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
1404
+ pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1
1405
+ pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0
14111406 movdqu HashKey_4_k(%arg2), \TMP4
1412
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1407
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
14131408 movdqa \XMM1, \XMMDst
14141409 movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
14151410
....@@ -1419,10 +1414,10 @@
14191414 pshufd $78, \XMM2, \TMP2
14201415 pxor \XMM2, \TMP2
14211416 movdqu HashKey_3(%arg2), \TMP5
1422
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1423
- PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
1417
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1418
+ pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0
14241419 movdqu HashKey_3_k(%arg2), \TMP4
1425
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1420
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
14261421 pxor \TMP1, \TMP6
14271422 pxor \XMM2, \XMMDst
14281423 pxor \TMP2, \XMM1
....@@ -1434,10 +1429,10 @@
14341429 pshufd $78, \XMM3, \TMP2
14351430 pxor \XMM3, \TMP2
14361431 movdqu HashKey_2(%arg2), \TMP5
1437
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1438
- PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
1432
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1433
+ pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0
14391434 movdqu HashKey_2_k(%arg2), \TMP4
1440
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1435
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
14411436 pxor \TMP1, \TMP6
14421437 pxor \XMM3, \XMMDst
14431438 pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
....@@ -1447,10 +1442,10 @@
14471442 pshufd $78, \XMM4, \TMP2
14481443 pxor \XMM4, \TMP2
14491444 movdqu HashKey(%arg2), \TMP5
1450
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1451
- PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
1445
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
1446
+ pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0
14521447 movdqu HashKey_k(%arg2), \TMP4
1453
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1448
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
14541449 pxor \TMP1, \TMP6
14551450 pxor \XMM4, \XMMDst
14561451 pxor \XMM1, \TMP2
....@@ -1508,13 +1503,13 @@
15081503
15091504 _esb_loop_\@:
15101505 MOVADQ (%r10),\TMP1
1511
- AESENC \TMP1,\XMM0
1506
+ aesenc \TMP1,\XMM0
15121507 add $16,%r10
15131508 sub $1,%eax
15141509 jnz _esb_loop_\@
15151510
15161511 MOVADQ (%r10),\TMP1
1517
- AESENCLAST \TMP1,\XMM0
1512
+ aesenclast \TMP1,\XMM0
15181513 .endm
15191514 /*****************************************************************************
15201515 * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
....@@ -1596,15 +1591,15 @@
15961591 * poly = x^128 + x^127 + x^126 + x^121 + 1
15971592 *
15981593 *****************************************************************************/
1599
-ENTRY(aesni_gcm_dec)
1594
+SYM_FUNC_START(aesni_gcm_dec)
16001595 FUNC_SAVE
16011596
16021597 GCM_INIT %arg6, arg7, arg8, arg9
16031598 GCM_ENC_DEC dec
16041599 GCM_COMPLETE arg10, arg11
16051600 FUNC_RESTORE
1606
- ret
1607
-ENDPROC(aesni_gcm_dec)
1601
+ RET
1602
+SYM_FUNC_END(aesni_gcm_dec)
16081603
16091604
16101605 /*****************************************************************************
....@@ -1684,7 +1679,7 @@
16841679 *
16851680 * poly = x^128 + x^127 + x^126 + x^121 + 1
16861681 ***************************************************************************/
1687
-ENTRY(aesni_gcm_enc)
1682
+SYM_FUNC_START(aesni_gcm_enc)
16881683 FUNC_SAVE
16891684
16901685 GCM_INIT %arg6, arg7, arg8, arg9
....@@ -1692,8 +1687,8 @@
16921687
16931688 GCM_COMPLETE arg10, arg11
16941689 FUNC_RESTORE
1695
- ret
1696
-ENDPROC(aesni_gcm_enc)
1690
+ RET
1691
+SYM_FUNC_END(aesni_gcm_enc)
16971692
16981693 /*****************************************************************************
16991694 * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
....@@ -1706,12 +1701,12 @@
17061701 * const u8 *aad, // Additional Authentication Data (AAD)
17071702 * u64 aad_len) // Length of AAD in bytes.
17081703 */
1709
-ENTRY(aesni_gcm_init)
1704
+SYM_FUNC_START(aesni_gcm_init)
17101705 FUNC_SAVE
17111706 GCM_INIT %arg3, %arg4,%arg5, %arg6
17121707 FUNC_RESTORE
1713
- ret
1714
-ENDPROC(aesni_gcm_init)
1708
+ RET
1709
+SYM_FUNC_END(aesni_gcm_init)
17151710
17161711 /*****************************************************************************
17171712 * void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
....@@ -1721,12 +1716,12 @@
17211716 * const u8 *in, // Plaintext input
17221717 * u64 plaintext_len, // Length of data in bytes for encryption.
17231718 */
1724
-ENTRY(aesni_gcm_enc_update)
1719
+SYM_FUNC_START(aesni_gcm_enc_update)
17251720 FUNC_SAVE
17261721 GCM_ENC_DEC enc
17271722 FUNC_RESTORE
1728
- ret
1729
-ENDPROC(aesni_gcm_enc_update)
1723
+ RET
1724
+SYM_FUNC_END(aesni_gcm_enc_update)
17301725
17311726 /*****************************************************************************
17321727 * void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
....@@ -1736,12 +1731,12 @@
17361731 * const u8 *in, // Plaintext input
17371732 * u64 plaintext_len, // Length of data in bytes for encryption.
17381733 */
1739
-ENTRY(aesni_gcm_dec_update)
1734
+SYM_FUNC_START(aesni_gcm_dec_update)
17401735 FUNC_SAVE
17411736 GCM_ENC_DEC dec
17421737 FUNC_RESTORE
1743
- ret
1744
-ENDPROC(aesni_gcm_dec_update)
1738
+ RET
1739
+SYM_FUNC_END(aesni_gcm_dec_update)
17451740
17461741 /*****************************************************************************
17471742 * void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
....@@ -1751,19 +1746,18 @@
17511746 * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
17521747 * // 12 or 8.
17531748 */
1754
-ENTRY(aesni_gcm_finalize)
1749
+SYM_FUNC_START(aesni_gcm_finalize)
17551750 FUNC_SAVE
17561751 GCM_COMPLETE %arg3 %arg4
17571752 FUNC_RESTORE
1758
- ret
1759
-ENDPROC(aesni_gcm_finalize)
1753
+ RET
1754
+SYM_FUNC_END(aesni_gcm_finalize)
17601755
17611756 #endif
17621757
17631758
1764
-.align 4
1765
-_key_expansion_128:
1766
-_key_expansion_256a:
1759
+SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
1760
+SYM_FUNC_START_LOCAL(_key_expansion_256a)
17671761 pshufd $0b11111111, %xmm1, %xmm1
17681762 shufps $0b00010000, %xmm0, %xmm4
17691763 pxor %xmm4, %xmm0
....@@ -1772,12 +1766,11 @@
17721766 pxor %xmm1, %xmm0
17731767 movaps %xmm0, (TKEYP)
17741768 add $0x10, TKEYP
1775
- ret
1776
-ENDPROC(_key_expansion_128)
1777
-ENDPROC(_key_expansion_256a)
1769
+ RET
1770
+SYM_FUNC_END(_key_expansion_256a)
1771
+SYM_FUNC_END_ALIAS(_key_expansion_128)
17781772
1779
-.align 4
1780
-_key_expansion_192a:
1773
+SYM_FUNC_START_LOCAL(_key_expansion_192a)
17811774 pshufd $0b01010101, %xmm1, %xmm1
17821775 shufps $0b00010000, %xmm0, %xmm4
17831776 pxor %xmm4, %xmm0
....@@ -1798,11 +1791,10 @@
17981791 shufps $0b01001110, %xmm2, %xmm1
17991792 movaps %xmm1, 0x10(TKEYP)
18001793 add $0x20, TKEYP
1801
- ret
1802
-ENDPROC(_key_expansion_192a)
1794
+ RET
1795
+SYM_FUNC_END(_key_expansion_192a)
18031796
1804
-.align 4
1805
-_key_expansion_192b:
1797
+SYM_FUNC_START_LOCAL(_key_expansion_192b)
18061798 pshufd $0b01010101, %xmm1, %xmm1
18071799 shufps $0b00010000, %xmm0, %xmm4
18081800 pxor %xmm4, %xmm0
....@@ -1818,11 +1810,10 @@
18181810
18191811 movaps %xmm0, (TKEYP)
18201812 add $0x10, TKEYP
1821
- ret
1822
-ENDPROC(_key_expansion_192b)
1813
+ RET
1814
+SYM_FUNC_END(_key_expansion_192b)
18231815
1824
-.align 4
1825
-_key_expansion_256b:
1816
+SYM_FUNC_START_LOCAL(_key_expansion_256b)
18261817 pshufd $0b10101010, %xmm1, %xmm1
18271818 shufps $0b00010000, %xmm2, %xmm4
18281819 pxor %xmm4, %xmm2
....@@ -1831,14 +1822,14 @@
18311822 pxor %xmm1, %xmm2
18321823 movaps %xmm2, (TKEYP)
18331824 add $0x10, TKEYP
1834
- ret
1835
-ENDPROC(_key_expansion_256b)
1825
+ RET
1826
+SYM_FUNC_END(_key_expansion_256b)
18361827
18371828 /*
18381829 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
18391830 * unsigned int key_len)
18401831 */
1841
-ENTRY(aesni_set_key)
1832
+SYM_FUNC_START(aesni_set_key)
18421833 FRAME_BEGIN
18431834 #ifndef __x86_64__
18441835 pushl KEYP
....@@ -1857,72 +1848,72 @@
18571848 movups 0x10(UKEYP), %xmm2 # other user key
18581849 movaps %xmm2, (TKEYP)
18591850 add $0x10, TKEYP
1860
- AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
1851
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
18611852 call _key_expansion_256a
1862
- AESKEYGENASSIST 0x1 %xmm0 %xmm1
1853
+ aeskeygenassist $0x1, %xmm0, %xmm1
18631854 call _key_expansion_256b
1864
- AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
1855
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
18651856 call _key_expansion_256a
1866
- AESKEYGENASSIST 0x2 %xmm0 %xmm1
1857
+ aeskeygenassist $0x2, %xmm0, %xmm1
18671858 call _key_expansion_256b
1868
- AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
1859
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
18691860 call _key_expansion_256a
1870
- AESKEYGENASSIST 0x4 %xmm0 %xmm1
1861
+ aeskeygenassist $0x4, %xmm0, %xmm1
18711862 call _key_expansion_256b
1872
- AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
1863
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
18731864 call _key_expansion_256a
1874
- AESKEYGENASSIST 0x8 %xmm0 %xmm1
1865
+ aeskeygenassist $0x8, %xmm0, %xmm1
18751866 call _key_expansion_256b
1876
- AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
1867
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
18771868 call _key_expansion_256a
1878
- AESKEYGENASSIST 0x10 %xmm0 %xmm1
1869
+ aeskeygenassist $0x10, %xmm0, %xmm1
18791870 call _key_expansion_256b
1880
- AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
1871
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
18811872 call _key_expansion_256a
1882
- AESKEYGENASSIST 0x20 %xmm0 %xmm1
1873
+ aeskeygenassist $0x20, %xmm0, %xmm1
18831874 call _key_expansion_256b
1884
- AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
1875
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
18851876 call _key_expansion_256a
18861877 jmp .Ldec_key
18871878 .Lenc_key192:
18881879 movq 0x10(UKEYP), %xmm2 # other user key
1889
- AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
1880
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
18901881 call _key_expansion_192a
1891
- AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
1882
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
18921883 call _key_expansion_192b
1893
- AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
1884
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
18941885 call _key_expansion_192a
1895
- AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
1886
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
18961887 call _key_expansion_192b
1897
- AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
1888
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
18981889 call _key_expansion_192a
1899
- AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
1890
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
19001891 call _key_expansion_192b
1901
- AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
1892
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
19021893 call _key_expansion_192a
1903
- AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
1894
+ aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
19041895 call _key_expansion_192b
19051896 jmp .Ldec_key
19061897 .Lenc_key128:
1907
- AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
1898
+ aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
19081899 call _key_expansion_128
1909
- AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
1900
+ aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
19101901 call _key_expansion_128
1911
- AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
1902
+ aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
19121903 call _key_expansion_128
1913
- AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
1904
+ aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
19141905 call _key_expansion_128
1915
- AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
1906
+ aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
19161907 call _key_expansion_128
1917
- AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
1908
+ aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
19181909 call _key_expansion_128
1919
- AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
1910
+ aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
19201911 call _key_expansion_128
1921
- AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
1912
+ aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
19221913 call _key_expansion_128
1923
- AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
1914
+ aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
19241915 call _key_expansion_128
1925
- AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
1916
+ aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
19261917 call _key_expansion_128
19271918 .Ldec_key:
19281919 sub $0x10, TKEYP
....@@ -1935,7 +1926,7 @@
19351926 .align 4
19361927 .Ldec_key_loop:
19371928 movaps (KEYP), %xmm0
1938
- AESIMC %xmm0 %xmm1
1929
+ aesimc %xmm0, %xmm1
19391930 movaps %xmm1, (UKEYP)
19401931 add $0x10, KEYP
19411932 sub $0x10, UKEYP
....@@ -1946,13 +1937,13 @@
19461937 popl KEYP
19471938 #endif
19481939 FRAME_END
1949
- ret
1950
-ENDPROC(aesni_set_key)
1940
+ RET
1941
+SYM_FUNC_END(aesni_set_key)
19511942
19521943 /*
1953
- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
1944
+ * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
19541945 */
1955
-ENTRY(aesni_enc)
1946
+SYM_FUNC_START(aesni_enc)
19561947 FRAME_BEGIN
19571948 #ifndef __x86_64__
19581949 pushl KEYP
....@@ -1970,8 +1961,8 @@
19701961 popl KEYP
19711962 #endif
19721963 FRAME_END
1973
- ret
1974
-ENDPROC(aesni_enc)
1964
+ RET
1965
+SYM_FUNC_END(aesni_enc)
19751966
19761967 /*
19771968 * _aesni_enc1: internal ABI
....@@ -1985,8 +1976,7 @@
19851976 * KEY
19861977 * TKEYP (T1)
19871978 */
1988
-.align 4
1989
-_aesni_enc1:
1979
+SYM_FUNC_START_LOCAL(_aesni_enc1)
19901980 movaps (KEYP), KEY # key
19911981 mov KEYP, TKEYP
19921982 pxor KEY, STATE # round 0
....@@ -1997,39 +1987,39 @@
19971987 je .Lenc192
19981988 add $0x20, TKEYP
19991989 movaps -0x60(TKEYP), KEY
2000
- AESENC KEY STATE
1990
+ aesenc KEY, STATE
20011991 movaps -0x50(TKEYP), KEY
2002
- AESENC KEY STATE
1992
+ aesenc KEY, STATE
20031993 .align 4
20041994 .Lenc192:
20051995 movaps -0x40(TKEYP), KEY
2006
- AESENC KEY STATE
1996
+ aesenc KEY, STATE
20071997 movaps -0x30(TKEYP), KEY
2008
- AESENC KEY STATE
1998
+ aesenc KEY, STATE
20091999 .align 4
20102000 .Lenc128:
20112001 movaps -0x20(TKEYP), KEY
2012
- AESENC KEY STATE
2002
+ aesenc KEY, STATE
20132003 movaps -0x10(TKEYP), KEY
2014
- AESENC KEY STATE
2004
+ aesenc KEY, STATE
20152005 movaps (TKEYP), KEY
2016
- AESENC KEY STATE
2006
+ aesenc KEY, STATE
20172007 movaps 0x10(TKEYP), KEY
2018
- AESENC KEY STATE
2008
+ aesenc KEY, STATE
20192009 movaps 0x20(TKEYP), KEY
2020
- AESENC KEY STATE
2010
+ aesenc KEY, STATE
20212011 movaps 0x30(TKEYP), KEY
2022
- AESENC KEY STATE
2012
+ aesenc KEY, STATE
20232013 movaps 0x40(TKEYP), KEY
2024
- AESENC KEY STATE
2014
+ aesenc KEY, STATE
20252015 movaps 0x50(TKEYP), KEY
2026
- AESENC KEY STATE
2016
+ aesenc KEY, STATE
20272017 movaps 0x60(TKEYP), KEY
2028
- AESENC KEY STATE
2018
+ aesenc KEY, STATE
20292019 movaps 0x70(TKEYP), KEY
2030
- AESENCLAST KEY STATE
2031
- ret
2032
-ENDPROC(_aesni_enc1)
2020
+ aesenclast KEY, STATE
2021
+ RET
2022
+SYM_FUNC_END(_aesni_enc1)
20332023
20342024 /*
20352025 * _aesni_enc4: internal ABI
....@@ -2049,8 +2039,7 @@
20492039 * KEY
20502040 * TKEYP (T1)
20512041 */
2052
-.align 4
2053
-_aesni_enc4:
2042
+SYM_FUNC_START_LOCAL(_aesni_enc4)
20542043 movaps (KEYP), KEY # key
20552044 mov KEYP, TKEYP
20562045 pxor KEY, STATE1 # round 0
....@@ -2064,86 +2053,86 @@
20642053 je .L4enc192
20652054 add $0x20, TKEYP
20662055 movaps -0x60(TKEYP), KEY
2067
- AESENC KEY STATE1
2068
- AESENC KEY STATE2
2069
- AESENC KEY STATE3
2070
- AESENC KEY STATE4
2056
+ aesenc KEY, STATE1
2057
+ aesenc KEY, STATE2
2058
+ aesenc KEY, STATE3
2059
+ aesenc KEY, STATE4
20712060 movaps -0x50(TKEYP), KEY
2072
- AESENC KEY STATE1
2073
- AESENC KEY STATE2
2074
- AESENC KEY STATE3
2075
- AESENC KEY STATE4
2061
+ aesenc KEY, STATE1
2062
+ aesenc KEY, STATE2
2063
+ aesenc KEY, STATE3
2064
+ aesenc KEY, STATE4
20762065 #.align 4
20772066 .L4enc192:
20782067 movaps -0x40(TKEYP), KEY
2079
- AESENC KEY STATE1
2080
- AESENC KEY STATE2
2081
- AESENC KEY STATE3
2082
- AESENC KEY STATE4
2068
+ aesenc KEY, STATE1
2069
+ aesenc KEY, STATE2
2070
+ aesenc KEY, STATE3
2071
+ aesenc KEY, STATE4
20832072 movaps -0x30(TKEYP), KEY
2084
- AESENC KEY STATE1
2085
- AESENC KEY STATE2
2086
- AESENC KEY STATE3
2087
- AESENC KEY STATE4
2073
+ aesenc KEY, STATE1
2074
+ aesenc KEY, STATE2
2075
+ aesenc KEY, STATE3
2076
+ aesenc KEY, STATE4
20882077 #.align 4
20892078 .L4enc128:
20902079 movaps -0x20(TKEYP), KEY
2091
- AESENC KEY STATE1
2092
- AESENC KEY STATE2
2093
- AESENC KEY STATE3
2094
- AESENC KEY STATE4
2080
+ aesenc KEY, STATE1
2081
+ aesenc KEY, STATE2
2082
+ aesenc KEY, STATE3
2083
+ aesenc KEY, STATE4
20952084 movaps -0x10(TKEYP), KEY
2096
- AESENC KEY STATE1
2097
- AESENC KEY STATE2
2098
- AESENC KEY STATE3
2099
- AESENC KEY STATE4
2085
+ aesenc KEY, STATE1
2086
+ aesenc KEY, STATE2
2087
+ aesenc KEY, STATE3
2088
+ aesenc KEY, STATE4
21002089 movaps (TKEYP), KEY
2101
- AESENC KEY STATE1
2102
- AESENC KEY STATE2
2103
- AESENC KEY STATE3
2104
- AESENC KEY STATE4
2090
+ aesenc KEY, STATE1
2091
+ aesenc KEY, STATE2
2092
+ aesenc KEY, STATE3
2093
+ aesenc KEY, STATE4
21052094 movaps 0x10(TKEYP), KEY
2106
- AESENC KEY STATE1
2107
- AESENC KEY STATE2
2108
- AESENC KEY STATE3
2109
- AESENC KEY STATE4
2095
+ aesenc KEY, STATE1
2096
+ aesenc KEY, STATE2
2097
+ aesenc KEY, STATE3
2098
+ aesenc KEY, STATE4
21102099 movaps 0x20(TKEYP), KEY
2111
- AESENC KEY STATE1
2112
- AESENC KEY STATE2
2113
- AESENC KEY STATE3
2114
- AESENC KEY STATE4
2100
+ aesenc KEY, STATE1
2101
+ aesenc KEY, STATE2
2102
+ aesenc KEY, STATE3
2103
+ aesenc KEY, STATE4
21152104 movaps 0x30(TKEYP), KEY
2116
- AESENC KEY STATE1
2117
- AESENC KEY STATE2
2118
- AESENC KEY STATE3
2119
- AESENC KEY STATE4
2105
+ aesenc KEY, STATE1
2106
+ aesenc KEY, STATE2
2107
+ aesenc KEY, STATE3
2108
+ aesenc KEY, STATE4
21202109 movaps 0x40(TKEYP), KEY
2121
- AESENC KEY STATE1
2122
- AESENC KEY STATE2
2123
- AESENC KEY STATE3
2124
- AESENC KEY STATE4
2110
+ aesenc KEY, STATE1
2111
+ aesenc KEY, STATE2
2112
+ aesenc KEY, STATE3
2113
+ aesenc KEY, STATE4
21252114 movaps 0x50(TKEYP), KEY
2126
- AESENC KEY STATE1
2127
- AESENC KEY STATE2
2128
- AESENC KEY STATE3
2129
- AESENC KEY STATE4
2115
+ aesenc KEY, STATE1
2116
+ aesenc KEY, STATE2
2117
+ aesenc KEY, STATE3
2118
+ aesenc KEY, STATE4
21302119 movaps 0x60(TKEYP), KEY
2131
- AESENC KEY STATE1
2132
- AESENC KEY STATE2
2133
- AESENC KEY STATE3
2134
- AESENC KEY STATE4
2120
+ aesenc KEY, STATE1
2121
+ aesenc KEY, STATE2
2122
+ aesenc KEY, STATE3
2123
+ aesenc KEY, STATE4
21352124 movaps 0x70(TKEYP), KEY
2136
- AESENCLAST KEY STATE1 # last round
2137
- AESENCLAST KEY STATE2
2138
- AESENCLAST KEY STATE3
2139
- AESENCLAST KEY STATE4
2140
- ret
2141
-ENDPROC(_aesni_enc4)
2125
+ aesenclast KEY, STATE1 # last round
2126
+ aesenclast KEY, STATE2
2127
+ aesenclast KEY, STATE3
2128
+ aesenclast KEY, STATE4
2129
+ RET
2130
+SYM_FUNC_END(_aesni_enc4)
21422131
21432132 /*
2144
- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
2133
+ * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
21452134 */
2146
-ENTRY(aesni_dec)
2135
+SYM_FUNC_START(aesni_dec)
21472136 FRAME_BEGIN
21482137 #ifndef __x86_64__
21492138 pushl KEYP
....@@ -2162,8 +2151,8 @@
21622151 popl KEYP
21632152 #endif
21642153 FRAME_END
2165
- ret
2166
-ENDPROC(aesni_dec)
2154
+ RET
2155
+SYM_FUNC_END(aesni_dec)
21672156
21682157 /*
21692158 * _aesni_dec1: internal ABI
....@@ -2177,8 +2166,7 @@
21772166 * KEY
21782167 * TKEYP (T1)
21792168 */
2180
-.align 4
2181
-_aesni_dec1:
2169
+SYM_FUNC_START_LOCAL(_aesni_dec1)
21822170 movaps (KEYP), KEY # key
21832171 mov KEYP, TKEYP
21842172 pxor KEY, STATE # round 0
....@@ -2189,39 +2177,39 @@
21892177 je .Ldec192
21902178 add $0x20, TKEYP
21912179 movaps -0x60(TKEYP), KEY
2192
- AESDEC KEY STATE
2180
+ aesdec KEY, STATE
21932181 movaps -0x50(TKEYP), KEY
2194
- AESDEC KEY STATE
2182
+ aesdec KEY, STATE
21952183 .align 4
21962184 .Ldec192:
21972185 movaps -0x40(TKEYP), KEY
2198
- AESDEC KEY STATE
2186
+ aesdec KEY, STATE
21992187 movaps -0x30(TKEYP), KEY
2200
- AESDEC KEY STATE
2188
+ aesdec KEY, STATE
22012189 .align 4
22022190 .Ldec128:
22032191 movaps -0x20(TKEYP), KEY
2204
- AESDEC KEY STATE
2192
+ aesdec KEY, STATE
22052193 movaps -0x10(TKEYP), KEY
2206
- AESDEC KEY STATE
2194
+ aesdec KEY, STATE
22072195 movaps (TKEYP), KEY
2208
- AESDEC KEY STATE
2196
+ aesdec KEY, STATE
22092197 movaps 0x10(TKEYP), KEY
2210
- AESDEC KEY STATE
2198
+ aesdec KEY, STATE
22112199 movaps 0x20(TKEYP), KEY
2212
- AESDEC KEY STATE
2200
+ aesdec KEY, STATE
22132201 movaps 0x30(TKEYP), KEY
2214
- AESDEC KEY STATE
2202
+ aesdec KEY, STATE
22152203 movaps 0x40(TKEYP), KEY
2216
- AESDEC KEY STATE
2204
+ aesdec KEY, STATE
22172205 movaps 0x50(TKEYP), KEY
2218
- AESDEC KEY STATE
2206
+ aesdec KEY, STATE
22192207 movaps 0x60(TKEYP), KEY
2220
- AESDEC KEY STATE
2208
+ aesdec KEY, STATE
22212209 movaps 0x70(TKEYP), KEY
2222
- AESDECLAST KEY STATE
2223
- ret
2224
-ENDPROC(_aesni_dec1)
2210
+ aesdeclast KEY, STATE
2211
+ RET
2212
+SYM_FUNC_END(_aesni_dec1)
22252213
22262214 /*
22272215 * _aesni_dec4: internal ABI
....@@ -2241,8 +2229,7 @@
22412229 * KEY
22422230 * TKEYP (T1)
22432231 */
2244
-.align 4
2245
-_aesni_dec4:
2232
+SYM_FUNC_START_LOCAL(_aesni_dec4)
22462233 movaps (KEYP), KEY # key
22472234 mov KEYP, TKEYP
22482235 pxor KEY, STATE1 # round 0
....@@ -2256,87 +2243,87 @@
22562243 je .L4dec192
22572244 add $0x20, TKEYP
22582245 movaps -0x60(TKEYP), KEY
2259
- AESDEC KEY STATE1
2260
- AESDEC KEY STATE2
2261
- AESDEC KEY STATE3
2262
- AESDEC KEY STATE4
2246
+ aesdec KEY, STATE1
2247
+ aesdec KEY, STATE2
2248
+ aesdec KEY, STATE3
2249
+ aesdec KEY, STATE4
22632250 movaps -0x50(TKEYP), KEY
2264
- AESDEC KEY STATE1
2265
- AESDEC KEY STATE2
2266
- AESDEC KEY STATE3
2267
- AESDEC KEY STATE4
2251
+ aesdec KEY, STATE1
2252
+ aesdec KEY, STATE2
2253
+ aesdec KEY, STATE3
2254
+ aesdec KEY, STATE4
22682255 .align 4
22692256 .L4dec192:
22702257 movaps -0x40(TKEYP), KEY
2271
- AESDEC KEY STATE1
2272
- AESDEC KEY STATE2
2273
- AESDEC KEY STATE3
2274
- AESDEC KEY STATE4
2258
+ aesdec KEY, STATE1
2259
+ aesdec KEY, STATE2
2260
+ aesdec KEY, STATE3
2261
+ aesdec KEY, STATE4
22752262 movaps -0x30(TKEYP), KEY
2276
- AESDEC KEY STATE1
2277
- AESDEC KEY STATE2
2278
- AESDEC KEY STATE3
2279
- AESDEC KEY STATE4
2263
+ aesdec KEY, STATE1
2264
+ aesdec KEY, STATE2
2265
+ aesdec KEY, STATE3
2266
+ aesdec KEY, STATE4
22802267 .align 4
22812268 .L4dec128:
22822269 movaps -0x20(TKEYP), KEY
2283
- AESDEC KEY STATE1
2284
- AESDEC KEY STATE2
2285
- AESDEC KEY STATE3
2286
- AESDEC KEY STATE4
2270
+ aesdec KEY, STATE1
2271
+ aesdec KEY, STATE2
2272
+ aesdec KEY, STATE3
2273
+ aesdec KEY, STATE4
22872274 movaps -0x10(TKEYP), KEY
2288
- AESDEC KEY STATE1
2289
- AESDEC KEY STATE2
2290
- AESDEC KEY STATE3
2291
- AESDEC KEY STATE4
2275
+ aesdec KEY, STATE1
2276
+ aesdec KEY, STATE2
2277
+ aesdec KEY, STATE3
2278
+ aesdec KEY, STATE4
22922279 movaps (TKEYP), KEY
2293
- AESDEC KEY STATE1
2294
- AESDEC KEY STATE2
2295
- AESDEC KEY STATE3
2296
- AESDEC KEY STATE4
2280
+ aesdec KEY, STATE1
2281
+ aesdec KEY, STATE2
2282
+ aesdec KEY, STATE3
2283
+ aesdec KEY, STATE4
22972284 movaps 0x10(TKEYP), KEY
2298
- AESDEC KEY STATE1
2299
- AESDEC KEY STATE2
2300
- AESDEC KEY STATE3
2301
- AESDEC KEY STATE4
2285
+ aesdec KEY, STATE1
2286
+ aesdec KEY, STATE2
2287
+ aesdec KEY, STATE3
2288
+ aesdec KEY, STATE4
23022289 movaps 0x20(TKEYP), KEY
2303
- AESDEC KEY STATE1
2304
- AESDEC KEY STATE2
2305
- AESDEC KEY STATE3
2306
- AESDEC KEY STATE4
2290
+ aesdec KEY, STATE1
2291
+ aesdec KEY, STATE2
2292
+ aesdec KEY, STATE3
2293
+ aesdec KEY, STATE4
23072294 movaps 0x30(TKEYP), KEY
2308
- AESDEC KEY STATE1
2309
- AESDEC KEY STATE2
2310
- AESDEC KEY STATE3
2311
- AESDEC KEY STATE4
2295
+ aesdec KEY, STATE1
2296
+ aesdec KEY, STATE2
2297
+ aesdec KEY, STATE3
2298
+ aesdec KEY, STATE4
23122299 movaps 0x40(TKEYP), KEY
2313
- AESDEC KEY STATE1
2314
- AESDEC KEY STATE2
2315
- AESDEC KEY STATE3
2316
- AESDEC KEY STATE4
2300
+ aesdec KEY, STATE1
2301
+ aesdec KEY, STATE2
2302
+ aesdec KEY, STATE3
2303
+ aesdec KEY, STATE4
23172304 movaps 0x50(TKEYP), KEY
2318
- AESDEC KEY STATE1
2319
- AESDEC KEY STATE2
2320
- AESDEC KEY STATE3
2321
- AESDEC KEY STATE4
2305
+ aesdec KEY, STATE1
2306
+ aesdec KEY, STATE2
2307
+ aesdec KEY, STATE3
2308
+ aesdec KEY, STATE4
23222309 movaps 0x60(TKEYP), KEY
2323
- AESDEC KEY STATE1
2324
- AESDEC KEY STATE2
2325
- AESDEC KEY STATE3
2326
- AESDEC KEY STATE4
2310
+ aesdec KEY, STATE1
2311
+ aesdec KEY, STATE2
2312
+ aesdec KEY, STATE3
2313
+ aesdec KEY, STATE4
23272314 movaps 0x70(TKEYP), KEY
2328
- AESDECLAST KEY STATE1 # last round
2329
- AESDECLAST KEY STATE2
2330
- AESDECLAST KEY STATE3
2331
- AESDECLAST KEY STATE4
2332
- ret
2333
-ENDPROC(_aesni_dec4)
2315
+ aesdeclast KEY, STATE1 # last round
2316
+ aesdeclast KEY, STATE2
2317
+ aesdeclast KEY, STATE3
2318
+ aesdeclast KEY, STATE4
2319
+ RET
2320
+SYM_FUNC_END(_aesni_dec4)
23342321
23352322 /*
23362323 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
23372324 * size_t len)
23382325 */
2339
-ENTRY(aesni_ecb_enc)
2326
+SYM_FUNC_START(aesni_ecb_enc)
23402327 FRAME_BEGIN
23412328 #ifndef __x86_64__
23422329 pushl LEN
....@@ -2389,14 +2376,14 @@
23892376 popl LEN
23902377 #endif
23912378 FRAME_END
2392
- ret
2393
-ENDPROC(aesni_ecb_enc)
2379
+ RET
2380
+SYM_FUNC_END(aesni_ecb_enc)
23942381
23952382 /*
23962383 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
23972384 * size_t len);
23982385 */
2399
-ENTRY(aesni_ecb_dec)
2386
+SYM_FUNC_START(aesni_ecb_dec)
24002387 FRAME_BEGIN
24012388 #ifndef __x86_64__
24022389 pushl LEN
....@@ -2450,14 +2437,14 @@
24502437 popl LEN
24512438 #endif
24522439 FRAME_END
2453
- ret
2454
-ENDPROC(aesni_ecb_dec)
2440
+ RET
2441
+SYM_FUNC_END(aesni_ecb_dec)
24552442
24562443 /*
24572444 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
24582445 * size_t len, u8 *iv)
24592446 */
2460
-ENTRY(aesni_cbc_enc)
2447
+SYM_FUNC_START(aesni_cbc_enc)
24612448 FRAME_BEGIN
24622449 #ifndef __x86_64__
24632450 pushl IVP
....@@ -2494,14 +2481,14 @@
24942481 popl IVP
24952482 #endif
24962483 FRAME_END
2497
- ret
2498
-ENDPROC(aesni_cbc_enc)
2484
+ RET
2485
+SYM_FUNC_END(aesni_cbc_enc)
24992486
25002487 /*
25012488 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
25022489 * size_t len, u8 *iv)
25032490 */
2504
-ENTRY(aesni_cbc_dec)
2491
+SYM_FUNC_START(aesni_cbc_dec)
25052492 FRAME_BEGIN
25062493 #ifndef __x86_64__
25072494 pushl IVP
....@@ -2587,8 +2574,8 @@
25872574 popl IVP
25882575 #endif
25892576 FRAME_END
2590
- ret
2591
-ENDPROC(aesni_cbc_dec)
2577
+ RET
2578
+SYM_FUNC_END(aesni_cbc_dec)
25922579
25932580 #ifdef __x86_64__
25942581 .pushsection .rodata
....@@ -2608,16 +2595,15 @@
26082595 * INC: == 1, in little endian
26092596 * BSWAP_MASK == endian swapping mask
26102597 */
2611
-.align 4
2612
-_aesni_inc_init:
2598
+SYM_FUNC_START_LOCAL(_aesni_inc_init)
26132599 movaps .Lbswap_mask, BSWAP_MASK
26142600 movaps IV, CTR
2615
- PSHUFB_XMM BSWAP_MASK CTR
2601
+ pshufb BSWAP_MASK, CTR
26162602 mov $1, TCTR_LOW
2617
- MOVQ_R64_XMM TCTR_LOW INC
2618
- MOVQ_R64_XMM CTR TCTR_LOW
2619
- ret
2620
-ENDPROC(_aesni_inc_init)
2603
+ movq TCTR_LOW, INC
2604
+ movq CTR, TCTR_LOW
2605
+ RET
2606
+SYM_FUNC_END(_aesni_inc_init)
26212607
26222608 /*
26232609 * _aesni_inc: internal ABI
....@@ -2634,8 +2620,7 @@
26342620 * CTR: == output IV, in little endian
26352621 * TCTR_LOW: == lower qword of CTR
26362622 */
2637
-.align 4
2638
-_aesni_inc:
2623
+SYM_FUNC_START_LOCAL(_aesni_inc)
26392624 paddq INC, CTR
26402625 add $1, TCTR_LOW
26412626 jnc .Linc_low
....@@ -2644,15 +2629,15 @@
26442629 psrldq $8, INC
26452630 .Linc_low:
26462631 movaps CTR, IV
2647
- PSHUFB_XMM BSWAP_MASK IV
2648
- ret
2649
-ENDPROC(_aesni_inc)
2632
+ pshufb BSWAP_MASK, IV
2633
+ RET
2634
+SYM_FUNC_END(_aesni_inc)
26502635
26512636 /*
26522637 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
26532638 * size_t len, u8 *iv)
26542639 */
2655
-ENTRY(aesni_ctr_enc)
2640
+SYM_FUNC_START(aesni_ctr_enc)
26562641 FRAME_BEGIN
26572642 cmp $16, LEN
26582643 jb .Lctr_enc_just_ret
....@@ -2708,8 +2693,8 @@
27082693 movups IV, (IVP)
27092694 .Lctr_enc_just_ret:
27102695 FRAME_END
2711
- ret
2712
-ENDPROC(aesni_ctr_enc)
2696
+ RET
2697
+SYM_FUNC_END(aesni_ctr_enc)
27132698
27142699 /*
27152700 * _aesni_gf128mul_x_ble: internal ABI
....@@ -2730,25 +2715,18 @@
27302715 pxor CTR, IV;
27312716
27322717 /*
2733
- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
2734
- * bool enc, u8 *iv)
2718
+ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
2719
+ * const u8 *src, unsigned int len, le128 *iv)
27352720 */
2736
-ENTRY(aesni_xts_crypt8)
2721
+SYM_FUNC_START(aesni_xts_encrypt)
27372722 FRAME_BEGIN
2738
- cmpb $0, %cl
2739
- movl $0, %ecx
2740
- movl $240, %r10d
2741
- leaq _aesni_enc4, %r11
2742
- leaq _aesni_dec4, %rax
2743
- cmovel %r10d, %ecx
2744
- cmoveq %rax, %r11
27452723
27462724 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
27472725 movups (IVP), IV
27482726
27492727 mov 480(KEYP), KLEN
2750
- addq %rcx, KEYP
27512728
2729
+.Lxts_enc_loop4:
27522730 movdqa IV, STATE1
27532731 movdqu 0x00(INP), INC
27542732 pxor INC, STATE1
....@@ -2772,71 +2750,103 @@
27722750 pxor INC, STATE4
27732751 movdqu IV, 0x30(OUTP)
27742752
2775
- CALL_NOSPEC %r11
2753
+ call _aesni_enc4
27762754
27772755 movdqu 0x00(OUTP), INC
27782756 pxor INC, STATE1
27792757 movdqu STATE1, 0x00(OUTP)
27802758
2781
- _aesni_gf128mul_x_ble()
2782
- movdqa IV, STATE1
2783
- movdqu 0x40(INP), INC
2784
- pxor INC, STATE1
2785
- movdqu IV, 0x40(OUTP)
2786
-
27872759 movdqu 0x10(OUTP), INC
27882760 pxor INC, STATE2
27892761 movdqu STATE2, 0x10(OUTP)
27902762
2791
- _aesni_gf128mul_x_ble()
2792
- movdqa IV, STATE2
2793
- movdqu 0x50(INP), INC
2794
- pxor INC, STATE2
2795
- movdqu IV, 0x50(OUTP)
2796
-
27972763 movdqu 0x20(OUTP), INC
27982764 pxor INC, STATE3
27992765 movdqu STATE3, 0x20(OUTP)
2800
-
2801
- _aesni_gf128mul_x_ble()
2802
- movdqa IV, STATE3
2803
- movdqu 0x60(INP), INC
2804
- pxor INC, STATE3
2805
- movdqu IV, 0x60(OUTP)
28062766
28072767 movdqu 0x30(OUTP), INC
28082768 pxor INC, STATE4
28092769 movdqu STATE4, 0x30(OUTP)
28102770
28112771 _aesni_gf128mul_x_ble()
2812
- movdqa IV, STATE4
2813
- movdqu 0x70(INP), INC
2814
- pxor INC, STATE4
2815
- movdqu IV, 0x70(OUTP)
28162772
2817
- _aesni_gf128mul_x_ble()
2773
+ add $64, INP
2774
+ add $64, OUTP
2775
+ sub $64, LEN
2776
+ ja .Lxts_enc_loop4
2777
+
28182778 movups IV, (IVP)
28192779
2820
- CALL_NOSPEC %r11
2780
+ FRAME_END
2781
+ RET
2782
+SYM_FUNC_END(aesni_xts_encrypt)
28212783
2822
- movdqu 0x40(OUTP), INC
2784
+/*
2785
+ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
2786
+ * const u8 *src, unsigned int len, le128 *iv)
2787
+ */
2788
+SYM_FUNC_START(aesni_xts_decrypt)
2789
+ FRAME_BEGIN
2790
+
2791
+ movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
2792
+ movups (IVP), IV
2793
+
2794
+ mov 480(KEYP), KLEN
2795
+ add $240, KEYP
2796
+
2797
+.Lxts_dec_loop4:
2798
+ movdqa IV, STATE1
2799
+ movdqu 0x00(INP), INC
28232800 pxor INC, STATE1
2824
- movdqu STATE1, 0x40(OUTP)
2801
+ movdqu IV, 0x00(OUTP)
28252802
2826
- movdqu 0x50(OUTP), INC
2803
+ _aesni_gf128mul_x_ble()
2804
+ movdqa IV, STATE2
2805
+ movdqu 0x10(INP), INC
28272806 pxor INC, STATE2
2828
- movdqu STATE2, 0x50(OUTP)
2807
+ movdqu IV, 0x10(OUTP)
28292808
2830
- movdqu 0x60(OUTP), INC
2809
+ _aesni_gf128mul_x_ble()
2810
+ movdqa IV, STATE3
2811
+ movdqu 0x20(INP), INC
28312812 pxor INC, STATE3
2832
- movdqu STATE3, 0x60(OUTP)
2813
+ movdqu IV, 0x20(OUTP)
28332814
2834
- movdqu 0x70(OUTP), INC
2815
+ _aesni_gf128mul_x_ble()
2816
+ movdqa IV, STATE4
2817
+ movdqu 0x30(INP), INC
28352818 pxor INC, STATE4
2836
- movdqu STATE4, 0x70(OUTP)
2819
+ movdqu IV, 0x30(OUTP)
2820
+
2821
+ call _aesni_dec4
2822
+
2823
+ movdqu 0x00(OUTP), INC
2824
+ pxor INC, STATE1
2825
+ movdqu STATE1, 0x00(OUTP)
2826
+
2827
+ movdqu 0x10(OUTP), INC
2828
+ pxor INC, STATE2
2829
+ movdqu STATE2, 0x10(OUTP)
2830
+
2831
+ movdqu 0x20(OUTP), INC
2832
+ pxor INC, STATE3
2833
+ movdqu STATE3, 0x20(OUTP)
2834
+
2835
+ movdqu 0x30(OUTP), INC
2836
+ pxor INC, STATE4
2837
+ movdqu STATE4, 0x30(OUTP)
2838
+
2839
+ _aesni_gf128mul_x_ble()
2840
+
2841
+ add $64, INP
2842
+ add $64, OUTP
2843
+ sub $64, LEN
2844
+ ja .Lxts_dec_loop4
2845
+
2846
+ movups IV, (IVP)
28372847
28382848 FRAME_END
2839
- ret
2840
-ENDPROC(aesni_xts_crypt8)
2849
+ RET
2850
+SYM_FUNC_END(aesni_xts_decrypt)
28412851
28422852 #endif