~hc/RK356X_SDK_RELEASE.git

/* SPDX-License-Identifier: GPL-2.0 */
/*
 * arch/alpha/lib/memmove.S
 *
 * Barely optimized memmove routine for Alpha EV5.
 *
 * This is hand-massaged output from the original memcpy.c.  We defer to
 * memcpy whenever possible; the backwards copy loops are not unrolled.
 */
#include <asm/export.h>        
    .set noat
    .set noreorder
    .text
 
    .align 4
    .globl memmove
    .ent memmove
memmove:
    ldgp $29, 0($27)
    unop
    nop
    .prologue 1
 
    addq $16,$18,$4
    addq $17,$18,$5
    cmpule $4,$17,$1        /*  dest + n <= src  */
    cmpule $5,$16,$2        /*  dest >= src + n  */
 
    bis $1,$2,$1
    mov $16,$0
    xor $16,$17,$2
    bne $1,memcpy            !samegp
 
    and $2,7,$2            /* Test for src/dest co-alignment.  */
    and $16,7,$1
    cmpule $16,$17,$3
    bne $3,$memmove_up        /* dest < src */
 
    and $4,7,$1
    bne $2,$misaligned_dn
    unop
    beq $1,$skip_aligned_byte_loop_head_dn
 
$aligned_byte_loop_head_dn:
    lda $4,-1($4)
    lda $5,-1($5)
    unop
    ble $18,$egress
 
    ldq_u $3,0($5)
    ldq_u $2,0($4)
    lda $18,-1($18)
    extbl $3,$5,$1
 
    insbl $1,$4,$1
    mskbl $2,$4,$2
    bis $1,$2,$1
    and $4,7,$6
 
    stq_u $1,0($4)
    bne $6,$aligned_byte_loop_head_dn
 
$skip_aligned_byte_loop_head_dn:
    lda $18,-8($18)
    blt $18,$skip_aligned_word_loop_dn
 
$aligned_word_loop_dn:
    ldq $1,-8($5)
    nop
    lda $5,-8($5)
    lda $18,-8($18)
 
    stq $1,-8($4)
    nop
    lda $4,-8($4)
    bge $18,$aligned_word_loop_dn
 
$skip_aligned_word_loop_dn:
    lda $18,8($18)
    bgt $18,$byte_loop_tail_dn
    unop
    ret $31,($26),1
 
    .align 4
$misaligned_dn:
    nop
    fnop
    unop
    beq $18,$egress
 
$byte_loop_tail_dn:
    ldq_u $3,-1($5)
    ldq_u $2,-1($4)
    lda $5,-1($5)
    lda $4,-1($4)
 
    lda $18,-1($18)
    extbl $3,$5,$1
    insbl $1,$4,$1
    mskbl $2,$4,$2
 
    bis $1,$2,$1
    stq_u $1,0($4)
    bgt $18,$byte_loop_tail_dn
    br $egress
 
$memmove_up:
    mov $16,$4
    mov $17,$5
    bne $2,$misaligned_up
    beq $1,$skip_aligned_byte_loop_head_up
 
$aligned_byte_loop_head_up:
    unop
    ble $18,$egress
    ldq_u $3,0($5)
    ldq_u $2,0($4)
 
    lda $18,-1($18)
    extbl $3,$5,$1
    insbl $1,$4,$1
    mskbl $2,$4,$2
 
    bis $1,$2,$1
    lda $5,1($5)
    stq_u $1,0($4)
    lda $4,1($4)
 
    and $4,7,$6
    bne $6,$aligned_byte_loop_head_up
 
$skip_aligned_byte_loop_head_up:
    lda $18,-8($18)
    blt $18,$skip_aligned_word_loop_up
 
$aligned_word_loop_up:
    ldq $1,0($5)
    nop
    lda $5,8($5)
    lda $18,-8($18)
 
    stq $1,0($4)
    nop
    lda $4,8($4)
    bge $18,$aligned_word_loop_up
 
$skip_aligned_word_loop_up:
    lda $18,8($18)
    bgt $18,$byte_loop_tail_up
    unop
    ret $31,($26),1
 
    .align 4
$misaligned_up:
    nop
    fnop
    unop
    beq $18,$egress
 
$byte_loop_tail_up:
    ldq_u $3,0($5)
    ldq_u $2,0($4)
    lda $18,-1($18)
    extbl $3,$5,$1
 
    insbl $1,$4,$1
    mskbl $2,$4,$2
    bis $1,$2,$1
    stq_u $1,0($4)
 
    lda $5,1($5)
    lda $4,1($4)
    nop
    bgt $18,$byte_loop_tail_up
 
$egress:
    ret $31,($26),1
    nop
    nop
    nop
 
    .end memmove
    EXPORT_SYMBOL(memmove)