| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Bit sliced AES using NEON instructions |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 8 | | - * published by the Free Software Foundation. |
|---|
| 9 | 6 | */ |
|---|
| 10 | 7 | |
|---|
| 11 | 8 | /* |
|---|
| .. | .. |
|---|
| 383 | 380 | /* |
|---|
| 384 | 381 | * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) |
|---|
| 385 | 382 | */ |
|---|
| 386 | | -ENTRY(aesbs_convert_key) |
|---|
| 383 | +SYM_FUNC_START(aesbs_convert_key) |
|---|
| 387 | 384 | ld1 {v7.4s}, [x1], #16 // load round 0 key |
|---|
| 388 | 385 | ld1 {v17.4s}, [x1], #16 // load round 1 key |
|---|
| 389 | 386 | |
|---|
| .. | .. |
|---|
| 428 | 425 | eor v17.16b, v17.16b, v7.16b |
|---|
| 429 | 426 | str q17, [x0] |
|---|
| 430 | 427 | ret |
|---|
| 431 | | -ENDPROC(aesbs_convert_key) |
|---|
| 428 | +SYM_FUNC_END(aesbs_convert_key) |
|---|
| 432 | 429 | |
|---|
| 433 | 430 | .align 4 |
|---|
| 434 | | -aesbs_encrypt8: |
|---|
| 431 | +SYM_FUNC_START_LOCAL(aesbs_encrypt8) |
|---|
| 435 | 432 | ldr q9, [bskey], #16 // round 0 key |
|---|
| 436 | 433 | ldr q8, M0SR |
|---|
| 437 | 434 | ldr q24, SR |
|---|
| .. | .. |
|---|
| 491 | 488 | eor v2.16b, v2.16b, v12.16b |
|---|
| 492 | 489 | eor v5.16b, v5.16b, v12.16b |
|---|
| 493 | 490 | ret |
|---|
| 494 | | -ENDPROC(aesbs_encrypt8) |
|---|
| 491 | +SYM_FUNC_END(aesbs_encrypt8) |
|---|
| 495 | 492 | |
|---|
| 496 | 493 | .align 4 |
|---|
| 497 | | -aesbs_decrypt8: |
|---|
| 494 | +SYM_FUNC_START_LOCAL(aesbs_decrypt8) |
|---|
| 498 | 495 | lsl x9, rounds, #7 |
|---|
| 499 | 496 | add bskey, bskey, x9 |
|---|
| 500 | 497 | |
|---|
| .. | .. |
|---|
| 556 | 553 | eor v3.16b, v3.16b, v12.16b |
|---|
| 557 | 554 | eor v5.16b, v5.16b, v12.16b |
|---|
| 558 | 555 | ret |
|---|
| 559 | | -ENDPROC(aesbs_decrypt8) |
|---|
| 556 | +SYM_FUNC_END(aesbs_decrypt8) |
|---|
| 560 | 557 | |
|---|
| 561 | 558 | /* |
|---|
| 562 | 559 | * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
|---|
| .. | .. |
|---|
| 616 | 613 | st1 {\o7\().16b}, [x19], #16 |
|---|
| 617 | 614 | |
|---|
| 618 | 615 | cbz x23, 1f |
|---|
| 619 | | - cond_yield_neon |
|---|
| 620 | 616 | b 99b |
|---|
| 621 | 617 | |
|---|
| 622 | 618 | 1: frame_pop |
|---|
| .. | .. |
|---|
| 624 | 620 | .endm |
|---|
| 625 | 621 | |
|---|
| 626 | 622 | .align 4 |
|---|
| 627 | | -ENTRY(aesbs_ecb_encrypt) |
|---|
| 623 | +SYM_FUNC_START(aesbs_ecb_encrypt) |
|---|
| 628 | 624 | __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 |
|---|
| 629 | | -ENDPROC(aesbs_ecb_encrypt) |
|---|
| 625 | +SYM_FUNC_END(aesbs_ecb_encrypt) |
|---|
| 630 | 626 | |
|---|
| 631 | 627 | .align 4 |
|---|
| 632 | | -ENTRY(aesbs_ecb_decrypt) |
|---|
| 628 | +SYM_FUNC_START(aesbs_ecb_decrypt) |
|---|
| 633 | 629 | __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 |
|---|
| 634 | | -ENDPROC(aesbs_ecb_decrypt) |
|---|
| 630 | +SYM_FUNC_END(aesbs_ecb_decrypt) |
|---|
| 635 | 631 | |
|---|
| 636 | 632 | /* |
|---|
| 637 | 633 | * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
|---|
| 638 | 634 | * int blocks, u8 iv[]) |
|---|
| 639 | 635 | */ |
|---|
| 640 | 636 | .align 4 |
|---|
| 641 | | -ENTRY(aesbs_cbc_decrypt) |
|---|
| 637 | +SYM_FUNC_START(aesbs_cbc_decrypt) |
|---|
| 642 | 638 | frame_push 6 |
|---|
| 643 | 639 | |
|---|
| 644 | 640 | mov x19, x0 |
|---|
| .. | .. |
|---|
| 718 | 714 | 1: st1 {v24.16b}, [x24] // store IV |
|---|
| 719 | 715 | |
|---|
| 720 | 716 | cbz x23, 2f |
|---|
| 721 | | - cond_yield_neon |
|---|
| 722 | 717 | b 99b |
|---|
| 723 | 718 | |
|---|
| 724 | 719 | 2: frame_pop |
|---|
| 725 | 720 | ret |
|---|
| 726 | | -ENDPROC(aesbs_cbc_decrypt) |
|---|
| 721 | +SYM_FUNC_END(aesbs_cbc_decrypt) |
|---|
| 727 | 722 | |
|---|
| 728 | 723 | .macro next_tweak, out, in, const, tmp |
|---|
| 729 | 724 | sshr \tmp\().2d, \in\().2d, #63 |
|---|
| .. | .. |
|---|
| 733 | 728 | eor \out\().16b, \out\().16b, \tmp\().16b |
|---|
| 734 | 729 | .endm |
|---|
| 735 | 730 | |
|---|
| 736 | | - .align 4 |
|---|
| 737 | | -.Lxts_mul_x: |
|---|
| 738 | | -CPU_LE( .quad 1, 0x87 ) |
|---|
| 739 | | -CPU_BE( .quad 0x87, 1 ) |
|---|
| 740 | | - |
|---|
| 741 | 731 | /* |
|---|
| 742 | 732 | * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
|---|
| 743 | 733 | * int blocks, u8 iv[]) |
|---|
| 744 | 734 | * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
|---|
| 745 | 735 | * int blocks, u8 iv[]) |
|---|
| 746 | 736 | */ |
|---|
| 747 | | -__xts_crypt8: |
|---|
| 737 | +SYM_FUNC_START_LOCAL(__xts_crypt8) |
|---|
| 748 | 738 | mov x6, #1 |
|---|
| 749 | 739 | lsl x6, x6, x23 |
|---|
| 750 | 740 | subs w23, w23, #8 |
|---|
| .. | .. |
|---|
| 796 | 786 | |
|---|
| 797 | 787 | 0: mov bskey, x21 |
|---|
| 798 | 788 | mov rounds, x22 |
|---|
| 799 | | - br x7 |
|---|
| 800 | | -ENDPROC(__xts_crypt8) |
|---|
| 789 | + br x16 |
|---|
| 790 | +SYM_FUNC_END(__xts_crypt8) |
|---|
| 801 | 791 | |
|---|
| 802 | 792 | .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 |
|---|
| 803 | 793 | frame_push 6, 64 |
|---|
| .. | .. |
|---|
| 809 | 799 | mov x23, x4 |
|---|
| 810 | 800 | mov x24, x5 |
|---|
| 811 | 801 | |
|---|
| 812 | | -0: ldr q30, .Lxts_mul_x |
|---|
| 802 | + movi v30.2s, #0x1 |
|---|
| 803 | + movi v25.2s, #0x87 |
|---|
| 804 | + uzp1 v30.4s, v30.4s, v25.4s |
|---|
| 813 | 805 | ld1 {v25.16b}, [x24] |
|---|
| 814 | 806 | |
|---|
| 815 | | -99: adr x7, \do8 |
|---|
| 807 | +99: adr x16, \do8 |
|---|
| 816 | 808 | bl __xts_crypt8 |
|---|
| 817 | 809 | |
|---|
| 818 | 810 | ldp q16, q17, [sp, #.Lframe_local_offset] |
|---|
| .. | .. |
|---|
| 852 | 844 | cbz x23, 1f |
|---|
| 853 | 845 | st1 {v25.16b}, [x24] |
|---|
| 854 | 846 | |
|---|
| 855 | | - cond_yield_neon 0b |
|---|
| 856 | 847 | b 99b |
|---|
| 857 | 848 | |
|---|
| 858 | 849 | 1: st1 {v25.16b}, [x24] |
|---|
| .. | .. |
|---|
| 860 | 851 | ret |
|---|
| 861 | 852 | .endm |
|---|
| 862 | 853 | |
|---|
| 863 | | -ENTRY(aesbs_xts_encrypt) |
|---|
| 854 | +SYM_FUNC_START(aesbs_xts_encrypt) |
|---|
| 864 | 855 | __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 |
|---|
| 865 | | -ENDPROC(aesbs_xts_encrypt) |
|---|
| 856 | +SYM_FUNC_END(aesbs_xts_encrypt) |
|---|
| 866 | 857 | |
|---|
| 867 | | -ENTRY(aesbs_xts_decrypt) |
|---|
| 858 | +SYM_FUNC_START(aesbs_xts_decrypt) |
|---|
| 868 | 859 | __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 |
|---|
| 869 | | -ENDPROC(aesbs_xts_decrypt) |
|---|
| 860 | +SYM_FUNC_END(aesbs_xts_decrypt) |
|---|
| 870 | 861 | |
|---|
| 871 | 862 | .macro next_ctr, v |
|---|
| 872 | 863 | mov \v\().d[1], x8 |
|---|
| .. | .. |
|---|
| 880 | 871 | * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], |
|---|
| 881 | 872 | * int rounds, int blocks, u8 iv[], u8 final[]) |
|---|
| 882 | 873 | */ |
|---|
| 883 | | -ENTRY(aesbs_ctr_encrypt) |
|---|
| 874 | +SYM_FUNC_START(aesbs_ctr_encrypt) |
|---|
| 884 | 875 | frame_push 8 |
|---|
| 885 | 876 | |
|---|
| 886 | 877 | mov x19, x0 |
|---|
| .. | .. |
|---|
| 895 | 886 | cset x26, ne |
|---|
| 896 | 887 | add x23, x23, x26 // do one extra block if final |
|---|
| 897 | 888 | |
|---|
| 898 | | -98: ldp x7, x8, [x24] |
|---|
| 889 | + ldp x7, x8, [x24] |
|---|
| 899 | 890 | ld1 {v0.16b}, [x24] |
|---|
| 900 | 891 | CPU_LE( rev x7, x7 ) |
|---|
| 901 | 892 | CPU_LE( rev x8, x8 ) |
|---|
| .. | .. |
|---|
| 973 | 964 | st1 {v0.16b}, [x24] |
|---|
| 974 | 965 | cbz x23, .Lctr_done |
|---|
| 975 | 966 | |
|---|
| 976 | | - cond_yield_neon 98b |
|---|
| 977 | 967 | b 99b |
|---|
| 978 | 968 | |
|---|
| 979 | 969 | .Lctr_done: |
|---|
| .. | .. |
|---|
| 1008 | 998 | 7: cbz x25, 8b |
|---|
| 1009 | 999 | st1 {v5.16b}, [x25] |
|---|
| 1010 | 1000 | b 8b |
|---|
| 1011 | | -ENDPROC(aesbs_ctr_encrypt) |
|---|
| 1001 | +SYM_FUNC_END(aesbs_ctr_encrypt) |
|---|