.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only */ |
---|
1 | 2 | /* |
---|
2 | 3 | * Bit sliced AES using NEON instructions |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or modify |
---|
7 | | - * it under the terms of the GNU General Public License version 2 as |
---|
8 | | - * published by the Free Software Foundation. |
---|
9 | 6 | */ |
---|
10 | 7 | |
---|
11 | 8 | /* |
---|
.. | .. |
---|
383 | 380 | /* |
---|
384 | 381 | * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) |
---|
385 | 382 | */ |
---|
386 | | -ENTRY(aesbs_convert_key) |
---|
| 383 | +SYM_FUNC_START(aesbs_convert_key) |
---|
387 | 384 | ld1 {v7.4s}, [x1], #16 // load round 0 key |
---|
388 | 385 | ld1 {v17.4s}, [x1], #16 // load round 1 key |
---|
389 | 386 | |
---|
.. | .. |
---|
428 | 425 | eor v17.16b, v17.16b, v7.16b |
---|
429 | 426 | str q17, [x0] |
---|
430 | 427 | ret |
---|
431 | | -ENDPROC(aesbs_convert_key) |
---|
| 428 | +SYM_FUNC_END(aesbs_convert_key) |
---|
432 | 429 | |
---|
433 | 430 | .align 4 |
---|
434 | | -aesbs_encrypt8: |
---|
| 431 | +SYM_FUNC_START_LOCAL(aesbs_encrypt8) |
---|
435 | 432 | ldr q9, [bskey], #16 // round 0 key |
---|
436 | 433 | ldr q8, M0SR |
---|
437 | 434 | ldr q24, SR |
---|
.. | .. |
---|
491 | 488 | eor v2.16b, v2.16b, v12.16b |
---|
492 | 489 | eor v5.16b, v5.16b, v12.16b |
---|
493 | 490 | ret |
---|
494 | | -ENDPROC(aesbs_encrypt8) |
---|
| 491 | +SYM_FUNC_END(aesbs_encrypt8) |
---|
495 | 492 | |
---|
496 | 493 | .align 4 |
---|
497 | | -aesbs_decrypt8: |
---|
| 494 | +SYM_FUNC_START_LOCAL(aesbs_decrypt8) |
---|
498 | 495 | lsl x9, rounds, #7 |
---|
499 | 496 | add bskey, bskey, x9 |
---|
500 | 497 | |
---|
.. | .. |
---|
556 | 553 | eor v3.16b, v3.16b, v12.16b |
---|
557 | 554 | eor v5.16b, v5.16b, v12.16b |
---|
558 | 555 | ret |
---|
559 | | -ENDPROC(aesbs_decrypt8) |
---|
| 556 | +SYM_FUNC_END(aesbs_decrypt8) |
---|
560 | 557 | |
---|
561 | 558 | /* |
---|
562 | 559 | * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
.. | .. |
---|
616 | 613 | st1 {\o7\().16b}, [x19], #16 |
---|
617 | 614 | |
---|
618 | 615 | cbz x23, 1f |
---|
619 | | - cond_yield_neon |
---|
620 | 616 | b 99b |
---|
621 | 617 | |
---|
622 | 618 | 1: frame_pop |
---|
.. | .. |
---|
624 | 620 | .endm |
---|
625 | 621 | |
---|
626 | 622 | .align 4 |
---|
627 | | -ENTRY(aesbs_ecb_encrypt) |
---|
| 623 | +SYM_FUNC_START(aesbs_ecb_encrypt) |
---|
628 | 624 | __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 |
---|
629 | | -ENDPROC(aesbs_ecb_encrypt) |
---|
| 625 | +SYM_FUNC_END(aesbs_ecb_encrypt) |
---|
630 | 626 | |
---|
631 | 627 | .align 4 |
---|
632 | | -ENTRY(aesbs_ecb_decrypt) |
---|
| 628 | +SYM_FUNC_START(aesbs_ecb_decrypt) |
---|
633 | 629 | __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 |
---|
634 | | -ENDPROC(aesbs_ecb_decrypt) |
---|
| 630 | +SYM_FUNC_END(aesbs_ecb_decrypt) |
---|
635 | 631 | |
---|
636 | 632 | /* |
---|
637 | 633 | * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
638 | 634 | * int blocks, u8 iv[]) |
---|
639 | 635 | */ |
---|
640 | 636 | .align 4 |
---|
641 | | -ENTRY(aesbs_cbc_decrypt) |
---|
| 637 | +SYM_FUNC_START(aesbs_cbc_decrypt) |
---|
642 | 638 | frame_push 6 |
---|
643 | 639 | |
---|
644 | 640 | mov x19, x0 |
---|
.. | .. |
---|
718 | 714 | 1: st1 {v24.16b}, [x24] // store IV |
---|
719 | 715 | |
---|
720 | 716 | cbz x23, 2f |
---|
721 | | - cond_yield_neon |
---|
722 | 717 | b 99b |
---|
723 | 718 | |
---|
724 | 719 | 2: frame_pop |
---|
725 | 720 | ret |
---|
726 | | -ENDPROC(aesbs_cbc_decrypt) |
---|
| 721 | +SYM_FUNC_END(aesbs_cbc_decrypt) |
---|
727 | 722 | |
---|
728 | 723 | .macro next_tweak, out, in, const, tmp |
---|
729 | 724 | sshr \tmp\().2d, \in\().2d, #63 |
---|
.. | .. |
---|
733 | 728 | eor \out\().16b, \out\().16b, \tmp\().16b |
---|
734 | 729 | .endm |
---|
735 | 730 | |
---|
736 | | - .align 4 |
---|
737 | | -.Lxts_mul_x: |
---|
738 | | -CPU_LE( .quad 1, 0x87 ) |
---|
739 | | -CPU_BE( .quad 0x87, 1 ) |
---|
740 | | - |
---|
741 | 731 | /* |
---|
742 | 732 | * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
743 | 733 | * int blocks, u8 iv[]) |
---|
744 | 734 | * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
---|
745 | 735 | * int blocks, u8 iv[]) |
---|
746 | 736 | */ |
---|
747 | | -__xts_crypt8: |
---|
| 737 | +SYM_FUNC_START_LOCAL(__xts_crypt8) |
---|
748 | 738 | mov x6, #1 |
---|
749 | 739 | lsl x6, x6, x23 |
---|
750 | 740 | subs w23, w23, #8 |
---|
.. | .. |
---|
796 | 786 | |
---|
797 | 787 | 0: mov bskey, x21 |
---|
798 | 788 | mov rounds, x22 |
---|
799 | | - br x7 |
---|
800 | | -ENDPROC(__xts_crypt8) |
---|
| 789 | + br x16 |
---|
| 790 | +SYM_FUNC_END(__xts_crypt8) |
---|
801 | 791 | |
---|
802 | 792 | .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 |
---|
803 | 793 | frame_push 6, 64 |
---|
.. | .. |
---|
809 | 799 | mov x23, x4 |
---|
810 | 800 | mov x24, x5 |
---|
811 | 801 | |
---|
812 | | -0: ldr q30, .Lxts_mul_x |
---|
| 802 | + movi v30.2s, #0x1 |
---|
| 803 | + movi v25.2s, #0x87 |
---|
| 804 | + uzp1 v30.4s, v30.4s, v25.4s |
---|
813 | 805 | ld1 {v25.16b}, [x24] |
---|
814 | 806 | |
---|
815 | | -99: adr x7, \do8 |
---|
| 807 | +99: adr x16, \do8 |
---|
816 | 808 | bl __xts_crypt8 |
---|
817 | 809 | |
---|
818 | 810 | ldp q16, q17, [sp, #.Lframe_local_offset] |
---|
.. | .. |
---|
852 | 844 | cbz x23, 1f |
---|
853 | 845 | st1 {v25.16b}, [x24] |
---|
854 | 846 | |
---|
855 | | - cond_yield_neon 0b |
---|
856 | 847 | b 99b |
---|
857 | 848 | |
---|
858 | 849 | 1: st1 {v25.16b}, [x24] |
---|
.. | .. |
---|
860 | 851 | ret |
---|
861 | 852 | .endm |
---|
862 | 853 | |
---|
863 | | -ENTRY(aesbs_xts_encrypt) |
---|
| 854 | +SYM_FUNC_START(aesbs_xts_encrypt) |
---|
864 | 855 | __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 |
---|
865 | | -ENDPROC(aesbs_xts_encrypt) |
---|
| 856 | +SYM_FUNC_END(aesbs_xts_encrypt) |
---|
866 | 857 | |
---|
867 | | -ENTRY(aesbs_xts_decrypt) |
---|
| 858 | +SYM_FUNC_START(aesbs_xts_decrypt) |
---|
868 | 859 | __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 |
---|
869 | | -ENDPROC(aesbs_xts_decrypt) |
---|
| 860 | +SYM_FUNC_END(aesbs_xts_decrypt) |
---|
870 | 861 | |
---|
871 | 862 | .macro next_ctr, v |
---|
872 | 863 | mov \v\().d[1], x8 |
---|
.. | .. |
---|
880 | 871 | * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], |
---|
881 | 872 | * int rounds, int blocks, u8 iv[], u8 final[]) |
---|
882 | 873 | */ |
---|
883 | | -ENTRY(aesbs_ctr_encrypt) |
---|
| 874 | +SYM_FUNC_START(aesbs_ctr_encrypt) |
---|
884 | 875 | frame_push 8 |
---|
885 | 876 | |
---|
886 | 877 | mov x19, x0 |
---|
.. | .. |
---|
895 | 886 | cset x26, ne |
---|
896 | 887 | add x23, x23, x26 // do one extra block if final |
---|
897 | 888 | |
---|
898 | | -98: ldp x7, x8, [x24] |
---|
| 889 | + ldp x7, x8, [x24] |
---|
899 | 890 | ld1 {v0.16b}, [x24] |
---|
900 | 891 | CPU_LE( rev x7, x7 ) |
---|
901 | 892 | CPU_LE( rev x8, x8 ) |
---|
.. | .. |
---|
973 | 964 | st1 {v0.16b}, [x24] |
---|
974 | 965 | cbz x23, .Lctr_done |
---|
975 | 966 | |
---|
976 | | - cond_yield_neon 98b |
---|
977 | 967 | b 99b |
---|
978 | 968 | |
---|
979 | 969 | .Lctr_done: |
---|
.. | .. |
---|
1008 | 998 | 7: cbz x25, 8b |
---|
1009 | 999 | st1 {v5.16b}, [x25] |
---|
1010 | 1000 | b 8b |
---|
1011 | | -ENDPROC(aesbs_ctr_encrypt) |
---|
| 1001 | +SYM_FUNC_END(aesbs_ctr_encrypt) |
---|