~hc/RK356X_SDK_RELEASE.git

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Scalar AES core transform
 *
 * Copyright (C) 2017 Linaro Ltd.
 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
 */
 
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>
 
    .text
    .align        5
 
    rk        .req    r0
    rounds        .req    r1
    in        .req    r2
    out        .req    r3
    ttab        .req    ip
 
    t0        .req    lr
    t1        .req    r2
    t2        .req    r3
 
    .macro        __select, out, in, idx
    .if        __LINUX_ARM_ARCH__ < 7
    and        \out, \in, #0xff << (8 * \idx)
    .else
    ubfx        \out, \in, #(8 * \idx), #8
    .endif
    .endm
 
    .macro        __load, out, in, idx, sz, op
    .if        __LINUX_ARM_ARCH__ < 7 && \idx > 0
    ldr\op        \out, [ttab, \in, lsr #(8 * \idx) - \sz]
    .else
    ldr\op        \out, [ttab, \in, lsl #\sz]
    .endif
    .endm
 
    .macro        __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
    __select    \out0, \in0, 0
    __select    t0, \in1, 1
    __load        \out0, \out0, 0, \sz, \op
    __load        t0, t0, 1, \sz, \op
 
    .if        \enc
    __select    \out1, \in1, 0
    __select    t1, \in2, 1
    .else
    __select    \out1, \in3, 0
    __select    t1, \in0, 1
    .endif
    __load        \out1, \out1, 0, \sz, \op
    __select    t2, \in2, 2
    __load        t1, t1, 1, \sz, \op
    __load        t2, t2, 2, \sz, \op
 
    eor        \out0, \out0, t0, ror #24
 
    __select    t0, \in3, 3
    .if        \enc
    __select    \t3, \in3, 2
    __select    \t4, \in0, 3
    .else
    __select    \t3, \in1, 2
    __select    \t4, \in2, 3
    .endif
    __load        \t3, \t3, 2, \sz, \op
    __load        t0, t0, 3, \sz, \op
    __load        \t4, \t4, 3, \sz, \op
 
    .ifnb        \oldcpsr
    /*
     * This is the final round and we're done with all data-dependent table
     * lookups, so we can safely re-enable interrupts.
     */
    restore_irqs    \oldcpsr
    .endif
 
    eor        \out1, \out1, t1, ror #24
    eor        \out0, \out0, t2, ror #16
    ldm        rk!, {t1, t2}
    eor        \out1, \out1, \t3, ror #16
    eor        \out0, \out0, t0, ror #8
    eor        \out1, \out1, \t4, ror #8
    eor        \out0, \out0, t1
    eor        \out1, \out1, t2
    .endm
 
    .macro        fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
    __hround    \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
    __hround    \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
    .endm
 
    .macro        iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
    __hround    \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
    __hround    \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
    .endm
 
    .macro        __rev, out, in
    .if        __LINUX_ARM_ARCH__ < 6
    lsl        t0, \in, #24
    and        t1, \in, #0xff00
    and        t2, \in, #0xff0000
    orr        \out, t0, \in, lsr #24
    orr        \out, \out, t1, lsl #8
    orr        \out, \out, t2, lsr #8
    .else
    rev        \out, \in
    .endif
    .endm
 
    .macro        __adrl, out, sym, c
    .if        __LINUX_ARM_ARCH__ < 7
    ldr\c        \out, =\sym
    .else
    movw\c        \out, #:lower16:\sym
    movt\c        \out, #:upper16:\sym
    .endif
    .endm
 
    .macro        do_crypt, round, ttab, ltab, bsz
    push        {r3-r11, lr}
 
    // Load keys first, to reduce latency in case they're not cached yet.
    ldm        rk!, {r8-r11}
 
    ldr        r4, [in]
    ldr        r5, [in, #4]
    ldr        r6, [in, #8]
    ldr        r7, [in, #12]
 
#ifdef CONFIG_CPU_BIG_ENDIAN
    __rev        r4, r4
    __rev        r5, r5
    __rev        r6, r6
    __rev        r7, r7
#endif
 
    eor        r4, r4, r8
    eor        r5, r5, r9
    eor        r6, r6, r10
    eor        r7, r7, r11
 
    __adrl        ttab, \ttab
    /*
     * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
     * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
     * intended to make cache-timing attacks more difficult.  They may not
     * be fully prevented, however; see the paper
     * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
     * ("Cache-timing attacks on AES") for a discussion of the many
     * difficulties involved in writing truly constant-time AES software.
     */
     save_and_disable_irqs    t0
    .set        i, 0
    .rept        1024 / 128
    ldr        r8, [ttab, #i + 0]
    ldr        r9, [ttab, #i + 32]
    ldr        r10, [ttab, #i + 64]
    ldr        r11, [ttab, #i + 96]
    .set        i, i + 128
    .endr
    push        {t0}        // oldcpsr
 
    tst        rounds, #2
    bne        1f
 
0:    \round        r8, r9, r10, r11, r4, r5, r6, r7
    \round        r4, r5, r6, r7, r8, r9, r10, r11
 
1:    subs        rounds, rounds, #4
    \round        r8, r9, r10, r11, r4, r5, r6, r7
    bls        2f
    \round        r4, r5, r6, r7, r8, r9, r10, r11
    b        0b
 
2:    .ifb        \ltab
    add        ttab, ttab, #1
    .else
    __adrl        ttab, \ltab
    // Prefetch inverse S-box for final round; see explanation above
    .set        i, 0
    .rept        256 / 64
    ldr        t0, [ttab, #i + 0]
    ldr        t1, [ttab, #i + 32]
    .set        i, i + 64
    .endr
    .endif
 
    pop        {rounds}    // oldcpsr
    \round        r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
 
#ifdef CONFIG_CPU_BIG_ENDIAN
    __rev        r4, r4
    __rev        r5, r5
    __rev        r6, r6
    __rev        r7, r7
#endif
 
    ldr        out, [sp]
 
    str        r4, [out]
    str        r5, [out, #4]
    str        r6, [out, #8]
    str        r7, [out, #12]
 
    pop        {r3-r11, pc}
 
    .align        3
    .ltorg
    .endm
 
ENTRY(__aes_arm_encrypt)
    do_crypt    fround, crypto_ft_tab,, 2
ENDPROC(__aes_arm_encrypt)
 
    .align        5
ENTRY(__aes_arm_decrypt)
    do_crypt    iround, crypto_it_tab, crypto_aes_inv_sbox, 0
ENDPROC(__aes_arm_decrypt)