.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only */ |
---|
1 | 2 | /* |
---|
2 | 3 | * Bit sliced AES using NEON instructions |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2017 Linaro Ltd. |
---|
5 | 6 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify |
---|
8 | | - * it under the terms of the GNU General Public License version 2 as |
---|
9 | | - * published by the Free Software Foundation. |
---|
10 | 7 | */ |
---|
11 | 8 | |
---|
12 | 9 | /* |
---|
.. | .. |
---|
78 | 75 | .macro __ldr, out, sym |
---|
79 | 76 | vldr \out\()l, \sym |
---|
80 | 77 | vldr \out\()h, \sym + 8 |
---|
81 | | - .endm |
---|
82 | | - |
---|
83 | | - .macro __adr, reg, lbl |
---|
84 | | - adr \reg, \lbl |
---|
85 | | -THUMB( orr \reg, \reg, #1 ) |
---|
86 | 78 | .endm |
---|
87 | 79 | |
---|
88 | 80 | .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 |
---|
.. | .. |
---|
632 | 624 | push {r4-r6, lr} |
---|
633 | 625 | ldr r5, [sp, #16] // number of blocks |
---|
634 | 626 | |
---|
635 | | -99: __adr ip, 0f |
---|
| 627 | +99: adr ip, 0f |
---|
636 | 628 | and lr, r5, #7 |
---|
637 | 629 | cmp r5, #8 |
---|
638 | 630 | sub ip, ip, lr, lsl #2 |
---|
639 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 631 | + movlt pc, ip // computed goto if blocks < 8 |
---|
640 | 632 | |
---|
641 | 633 | vld1.8 {q0}, [r1]! |
---|
642 | 634 | vld1.8 {q1}, [r1]! |
---|
.. | .. |
---|
651 | 643 | mov rounds, r3 |
---|
652 | 644 | bl \do8 |
---|
653 | 645 | |
---|
654 | | - __adr ip, 1f |
---|
| 646 | + adr ip, 1f |
---|
655 | 647 | and lr, r5, #7 |
---|
656 | 648 | cmp r5, #8 |
---|
657 | 649 | sub ip, ip, lr, lsl #2 |
---|
658 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 650 | + movlt pc, ip // computed goto if blocks < 8 |
---|
659 | 651 | |
---|
660 | 652 | vst1.8 {\o0}, [r0]! |
---|
661 | 653 | vst1.8 {\o1}, [r0]! |
---|
.. | .. |
---|
692 | 684 | push {r4-r6, lr} |
---|
693 | 685 | ldm ip, {r5-r6} // load args 4-5 |
---|
694 | 686 | |
---|
695 | | -99: __adr ip, 0f |
---|
| 687 | +99: adr ip, 0f |
---|
696 | 688 | and lr, r5, #7 |
---|
697 | 689 | cmp r5, #8 |
---|
698 | 690 | sub ip, ip, lr, lsl #2 |
---|
699 | 691 | mov lr, r1 |
---|
700 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 692 | + movlt pc, ip // computed goto if blocks < 8 |
---|
701 | 693 | |
---|
702 | 694 | vld1.8 {q0}, [lr]! |
---|
703 | 695 | vld1.8 {q1}, [lr]! |
---|
.. | .. |
---|
721 | 713 | vmov q14, q8 |
---|
722 | 714 | vmov q15, q8 |
---|
723 | 715 | |
---|
724 | | - __adr ip, 1f |
---|
| 716 | + adr ip, 1f |
---|
725 | 717 | and lr, r5, #7 |
---|
726 | 718 | cmp r5, #8 |
---|
727 | 719 | sub ip, ip, lr, lsl #2 |
---|
728 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 720 | + movlt pc, ip // computed goto if blocks < 8 |
---|
729 | 721 | |
---|
730 | 722 | vld1.8 {q9}, [r1]! |
---|
731 | 723 | vld1.8 {q10}, [r1]! |
---|
.. | .. |
---|
736 | 728 | vld1.8 {q15}, [r1]! |
---|
737 | 729 | W(nop) |
---|
738 | 730 | |
---|
739 | | -1: __adr ip, 2f |
---|
| 731 | +1: adr ip, 2f |
---|
740 | 732 | sub ip, ip, lr, lsl #3 |
---|
741 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 733 | + movlt pc, ip // computed goto if blocks < 8 |
---|
742 | 734 | |
---|
743 | 735 | veor q0, q0, q8 |
---|
744 | 736 | vst1.8 {q0}, [r0]! |
---|
.. | .. |
---|
807 | 799 | vmov q6, q0 |
---|
808 | 800 | vmov q7, q0 |
---|
809 | 801 | |
---|
810 | | - __adr ip, 0f |
---|
| 802 | + adr ip, 0f |
---|
811 | 803 | sub lr, r5, #1 |
---|
812 | 804 | and lr, lr, #7 |
---|
813 | 805 | cmp r5, #8 |
---|
814 | 806 | sub ip, ip, lr, lsl #5 |
---|
815 | 807 | sub ip, ip, lr, lsl #2 |
---|
816 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 808 | + movlt pc, ip // computed goto if blocks < 8 |
---|
817 | 809 | |
---|
818 | 810 | next_ctr q1 |
---|
819 | 811 | next_ctr q2 |
---|
.. | .. |
---|
827 | 819 | mov rounds, r3 |
---|
828 | 820 | bl aesbs_encrypt8 |
---|
829 | 821 | |
---|
830 | | - __adr ip, 1f |
---|
| 822 | + adr ip, 1f |
---|
831 | 823 | and lr, r5, #7 |
---|
832 | 824 | cmp r5, #8 |
---|
833 | 825 | movgt r4, #0 |
---|
834 | 826 | ldrle r4, [sp, #40] // load final in the last round |
---|
835 | 827 | sub ip, ip, lr, lsl #2 |
---|
836 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 828 | + movlt pc, ip // computed goto if blocks < 8 |
---|
837 | 829 | |
---|
838 | 830 | vld1.8 {q8}, [r1]! |
---|
839 | 831 | vld1.8 {q9}, [r1]! |
---|
.. | .. |
---|
846 | 838 | 1: bne 2f |
---|
847 | 839 | vld1.8 {q15}, [r1]! |
---|
848 | 840 | |
---|
849 | | -2: __adr ip, 3f |
---|
| 841 | +2: adr ip, 3f |
---|
850 | 842 | cmp r5, #8 |
---|
851 | 843 | sub ip, ip, lr, lsl #3 |
---|
852 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 844 | + movlt pc, ip // computed goto if blocks < 8 |
---|
853 | 845 | |
---|
854 | 846 | veor q0, q0, q8 |
---|
855 | 847 | vst1.8 {q0}, [r0]! |
---|
.. | .. |
---|
890 | 882 | veor \out, \out, \tmp |
---|
891 | 883 | .endm |
---|
892 | 884 | |
---|
893 | | - .align 4 |
---|
894 | | -.Lxts_mul_x: |
---|
895 | | - .quad 1, 0x87 |
---|
896 | | - |
---|
897 | 885 | /* |
---|
898 | 886 | * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
899 | | - * int blocks, u8 iv[]) |
---|
| 887 | + * int blocks, u8 iv[], int reorder_last_tweak) |
---|
900 | 888 | * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
901 | | - * int blocks, u8 iv[]) |
---|
| 889 | + * int blocks, u8 iv[], int reorder_last_tweak) |
---|
902 | 890 | */ |
---|
903 | 891 | __xts_prepare8: |
---|
904 | 892 | vld1.8 {q14}, [r7] // load iv |
---|
905 | | - __ldr q15, .Lxts_mul_x // load tweak mask |
---|
| 893 | + vmov.i32 d30, #0x87 // compose tweak mask vector |
---|
| 894 | + vmovl.u32 q15, d30 |
---|
| 895 | + vshr.u64 d30, d31, #7 |
---|
906 | 896 | vmov q12, q14 |
---|
907 | 897 | |
---|
908 | | - __adr ip, 0f |
---|
| 898 | + adr ip, 0f |
---|
909 | 899 | and r4, r6, #7 |
---|
910 | 900 | cmp r6, #8 |
---|
911 | 901 | sub ip, ip, r4, lsl #5 |
---|
912 | 902 | mov r4, sp |
---|
913 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 903 | + movlt pc, ip // computed goto if blocks < 8 |
---|
914 | 904 | |
---|
915 | 905 | vld1.8 {q0}, [r1]! |
---|
916 | 906 | next_tweak q12, q14, q15, q13 |
---|
.. | .. |
---|
949 | 939 | |
---|
950 | 940 | vld1.8 {q7}, [r1]! |
---|
951 | 941 | next_tweak q14, q12, q15, q13 |
---|
952 | | - veor q7, q7, q12 |
---|
| 942 | +THUMB( itt le ) |
---|
| 943 | + W(cmple) r8, #0 |
---|
| 944 | + ble 1f |
---|
| 945 | +0: veor q7, q7, q12 |
---|
953 | 946 | vst1.8 {q12}, [r4, :128] |
---|
954 | 947 | |
---|
955 | | -0: vst1.8 {q14}, [r7] // store next iv |
---|
| 948 | + vst1.8 {q14}, [r7] // store next iv |
---|
956 | 949 | bx lr |
---|
| 950 | + |
---|
| 951 | +1: vswp q12, q14 |
---|
| 952 | + b 0b |
---|
957 | 953 | ENDPROC(__xts_prepare8) |
---|
958 | 954 | |
---|
959 | 955 | .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 |
---|
960 | 956 | push {r4-r8, lr} |
---|
961 | 957 | mov r5, sp // preserve sp |
---|
962 | 958 | ldrd r6, r7, [sp, #24] // get blocks and iv args |
---|
| 959 | + rsb r8, ip, #1 |
---|
963 | 960 | sub ip, sp, #128 // make room for 8x tweak |
---|
964 | 961 | bic ip, ip, #0xf // align sp to 16 bytes |
---|
965 | 962 | mov sp, ip |
---|
.. | .. |
---|
970 | 967 | mov rounds, r3 |
---|
971 | 968 | bl \do8 |
---|
972 | 969 | |
---|
973 | | - __adr ip, 0f |
---|
| 970 | + adr ip, 0f |
---|
974 | 971 | and lr, r6, #7 |
---|
975 | 972 | cmp r6, #8 |
---|
976 | 973 | sub ip, ip, lr, lsl #2 |
---|
977 | 974 | mov r4, sp |
---|
978 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 975 | + movlt pc, ip // computed goto if blocks < 8 |
---|
979 | 976 | |
---|
980 | 977 | vld1.8 {q8}, [r4, :128]! |
---|
981 | 978 | vld1.8 {q9}, [r4, :128]! |
---|
.. | .. |
---|
986 | 983 | vld1.8 {q14}, [r4, :128]! |
---|
987 | 984 | vld1.8 {q15}, [r4, :128] |
---|
988 | 985 | |
---|
989 | | -0: __adr ip, 1f |
---|
| 986 | +0: adr ip, 1f |
---|
990 | 987 | sub ip, ip, lr, lsl #3 |
---|
991 | | - bxlt ip // computed goto if blocks < 8 |
---|
| 988 | + movlt pc, ip // computed goto if blocks < 8 |
---|
992 | 989 | |
---|
993 | 990 | veor \o0, \o0, q8 |
---|
994 | 991 | vst1.8 {\o0}, [r0]! |
---|
.. | .. |
---|
1015 | 1012 | .endm |
---|
1016 | 1013 | |
---|
1017 | 1014 | ENTRY(aesbs_xts_encrypt) |
---|
| 1015 | + mov ip, #0 // never reorder final tweak |
---|
1018 | 1016 | __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 |
---|
1019 | 1017 | ENDPROC(aesbs_xts_encrypt) |
---|
1020 | 1018 | |
---|
1021 | 1019 | ENTRY(aesbs_xts_decrypt) |
---|
| 1020 | + ldr ip, [sp, #8] // reorder final tweak? |
---|
1022 | 1021 | __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 |
---|
1023 | 1022 | ENDPROC(aesbs_xts_decrypt) |
---|