~hc/RK356X_SDK_RELEASE.git

/*
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 *
 * Author: Nicolas Pitre <nico@fluxnic.net>
 *   - contributed to gcc-3.4 on Sep 30, 2003
 *   - adapted for the Linux kernel on Oct 2, 2003
 */
 
/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
 
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
 
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
 
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */
 
 
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
 
.macro ARM_DIV_BODY dividend, divisor, result, curbit
 
#if __LINUX_ARM_ARCH__ >= 5
 
    clz    \curbit, \divisor
    clz    \result, \dividend
    sub    \result, \curbit, \result
    mov    \curbit, #1
    mov    \divisor, \divisor, lsl \result
    mov    \curbit, \curbit, lsl \result
    mov    \result, #0
    
#else
 
    @ Initially shift the divisor left 3 bits if possible,
    @ set curbit accordingly.  This allows for curbit to be located
    @ at the left end of each 4 bit nibbles in the division loop
    @ to save one loop in most cases.
    tst    \divisor, #0xe0000000
    moveq    \divisor, \divisor, lsl #3
    moveq    \curbit, #8
    movne    \curbit, #1
 
    @ Unless the divisor is very big, shift it up in multiples of
    @ four bits, since this is the amount of unwinding in the main
    @ division loop.  Continue shifting until the divisor is 
    @ larger than the dividend.
1:    cmp    \divisor, #0x10000000
    cmplo    \divisor, \dividend
    movlo    \divisor, \divisor, lsl #4
    movlo    \curbit, \curbit, lsl #4
    blo    1b
 
    @ For very big divisors, we must shift it a bit at a time, or
    @ we will be in danger of overflowing.
1:    cmp    \divisor, #0x80000000
    cmplo    \divisor, \dividend
    movlo    \divisor, \divisor, lsl #1
    movlo    \curbit, \curbit, lsl #1
    blo    1b
 
    mov    \result, #0
 
#endif
 
    @ Division loop
1:    cmp    \dividend, \divisor
    subhs    \dividend, \dividend, \divisor
    orrhs    \result,   \result,   \curbit
    cmp    \dividend, \divisor,  lsr #1
    subhs    \dividend, \dividend, \divisor, lsr #1
    orrhs    \result,   \result,   \curbit,  lsr #1
    cmp    \dividend, \divisor,  lsr #2
    subhs    \dividend, \dividend, \divisor, lsr #2
    orrhs    \result,   \result,   \curbit,  lsr #2
    cmp    \dividend, \divisor,  lsr #3
    subhs    \dividend, \dividend, \divisor, lsr #3
    orrhs    \result,   \result,   \curbit,  lsr #3
    cmp    \dividend, #0            @ Early termination?
    movsne    \curbit,   \curbit,  lsr #4    @ No, any more bits to do?
    movne    \divisor,  \divisor, lsr #4
    bne    1b
 
.endm
 
 
.macro ARM_DIV2_ORDER divisor, order
 
#if __LINUX_ARM_ARCH__ >= 5
 
    clz    \order, \divisor
    rsb    \order, \order, #31
 
#else
 
    cmp    \divisor, #(1 << 16)
    movhs    \divisor, \divisor, lsr #16
    movhs    \order, #16
    movlo    \order, #0
 
    cmp    \divisor, #(1 << 8)
    movhs    \divisor, \divisor, lsr #8
    addhs    \order, \order, #8
 
    cmp    \divisor, #(1 << 4)
    movhs    \divisor, \divisor, lsr #4
    addhs    \order, \order, #4
 
    cmp    \divisor, #(1 << 2)
    addhi    \order, \order, #3
    addls    \order, \order, \divisor, lsr #1
 
#endif
 
.endm
 
 
.macro ARM_MOD_BODY dividend, divisor, order, spare
 
#if __LINUX_ARM_ARCH__ >= 5
 
    clz    \order, \divisor
    clz    \spare, \dividend
    sub    \order, \order, \spare
    mov    \divisor, \divisor, lsl \order
 
#else
 
    mov    \order, #0
 
    @ Unless the divisor is very big, shift it up in multiples of
    @ four bits, since this is the amount of unwinding in the main
    @ division loop.  Continue shifting until the divisor is 
    @ larger than the dividend.
1:    cmp    \divisor, #0x10000000
    cmplo    \divisor, \dividend
    movlo    \divisor, \divisor, lsl #4
    addlo    \order, \order, #4
    blo    1b
 
    @ For very big divisors, we must shift it a bit at a time, or
    @ we will be in danger of overflowing.
1:    cmp    \divisor, #0x80000000
    cmplo    \divisor, \dividend
    movlo    \divisor, \divisor, lsl #1
    addlo    \order, \order, #1
    blo    1b
 
#endif
 
    @ Perform all needed subtractions to keep only the reminder.
    @ Do comparisons in batch of 4 first.
    subs    \order, \order, #3        @ yes, 3 is intended here
    blt    2f
 
1:    cmp    \dividend, \divisor
    subhs    \dividend, \dividend, \divisor
    cmp    \dividend, \divisor,  lsr #1
    subhs    \dividend, \dividend, \divisor, lsr #1
    cmp    \dividend, \divisor,  lsr #2
    subhs    \dividend, \dividend, \divisor, lsr #2
    cmp    \dividend, \divisor,  lsr #3
    subhs    \dividend, \dividend, \divisor, lsr #3
    cmp    \dividend, #1
    mov    \divisor, \divisor, lsr #4
    subsge    \order, \order, #4
    bge    1b
 
    tst    \order, #3
    teqne    \dividend, #0
    beq    5f
 
    @ Either 1, 2 or 3 comparison/subtractions are left.
2:    cmn    \order, #2
    blt    4f
    beq    3f
    cmp    \dividend, \divisor
    subhs    \dividend, \dividend, \divisor
    mov    \divisor,  \divisor,  lsr #1
3:    cmp    \dividend, \divisor
    subhs    \dividend, \dividend, \divisor
    mov    \divisor,  \divisor,  lsr #1
4:    cmp    \dividend, \divisor
    subhs    \dividend, \dividend, \divisor
5:
.endm
 
 
#ifdef CONFIG_ARM_PATCH_IDIV
    .align    3
#endif
 
ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
 
    subs    r2, r1, #1
    reteq    lr
    bcc    Ldiv0
    cmp    r0, r1
    bls    11f
    tst    r1, r2
    beq    12f
 
    ARM_DIV_BODY r0, r1, r2, r3
 
    mov    r0, r2
    ret    lr
 
11:    moveq    r0, #1
    movne    r0, #0
    ret    lr
 
12:    ARM_DIV2_ORDER r1, r2
 
    mov    r0, r0, lsr r2
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__udivsi3)
ENDPROC(__aeabi_uidiv)
 
ENTRY(__umodsi3)
UNWIND(.fnstart)
 
    subs    r2, r1, #1            @ compare divisor with 1
    bcc    Ldiv0
    cmpne    r0, r1                @ compare dividend with divisor
    moveq   r0, #0
    tsthi    r1, r2                @ see if divisor is power of 2
    andeq    r0, r0, r2
    retls    lr
 
    ARM_MOD_BODY r0, r1, r2, r3
 
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__umodsi3)
 
#ifdef CONFIG_ARM_PATCH_IDIV
    .align 3
#endif
 
ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
UNWIND(.fnstart)
 
    cmp    r1, #0
    eor    ip, r0, r1            @ save the sign of the result.
    beq    Ldiv0
    rsbmi    r1, r1, #0            @ loops below use unsigned.
    subs    r2, r1, #1            @ division by 1 or -1 ?
    beq    10f
    movs    r3, r0
    rsbmi    r3, r0, #0            @ positive dividend value
    cmp    r3, r1
    bls    11f
    tst    r1, r2                @ divisor is power of 2 ?
    beq    12f
 
    ARM_DIV_BODY r3, r1, r0, r2
 
    cmp    ip, #0
    rsbmi    r0, r0, #0
    ret    lr
 
10:    teq    ip, r0                @ same sign ?
    rsbmi    r0, r0, #0
    ret    lr
 
11:    movlo    r0, #0
    moveq    r0, ip, asr #31
    orreq    r0, r0, #1
    ret    lr
 
12:    ARM_DIV2_ORDER r1, r2
 
    cmp    ip, #0
    mov    r0, r3, lsr r2
    rsbmi    r0, r0, #0
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__divsi3)
ENDPROC(__aeabi_idiv)
 
ENTRY(__modsi3)
UNWIND(.fnstart)
 
    cmp    r1, #0
    beq    Ldiv0
    rsbmi    r1, r1, #0            @ loops below use unsigned.
    movs    ip, r0                @ preserve sign of dividend
    rsbmi    r0, r0, #0            @ if negative make positive
    subs    r2, r1, #1            @ compare divisor with 1
    cmpne    r0, r1                @ compare dividend with divisor
    moveq    r0, #0
    tsthi    r1, r2                @ see if divisor is power of 2
    andeq    r0, r0, r2
    bls    10f
 
    ARM_MOD_BODY r0, r1, r2, r3
 
10:    cmp    ip, #0
    rsbmi    r0, r0, #0
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__modsi3)
 
#ifdef CONFIG_AEABI
 
ENTRY(__aeabi_uidivmod)
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr}    )
 
    stmfd    sp!, {r0, r1, ip, lr}
    bl    __aeabi_uidiv
    ldmfd    sp!, {r1, r2, ip, lr}
    mul    r3, r0, r2
    sub    r1, r1, r3
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
 
ENTRY(__aeabi_idivmod)
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr}    )
    stmfd    sp!, {r0, r1, ip, lr}
    bl    __aeabi_idiv
    ldmfd    sp!, {r1, r2, ip, lr}
    mul    r3, r0, r2
    sub    r1, r1, r3
    ret    lr
 
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
 
#endif
 
Ldiv0:
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
    str    lr, [sp, #-8]!
    bl    __div0
    mov    r0, #0            @ About as wrong as it could be.
    ldr    pc, [sp], #8
UNWIND(.fnend)
ENDPROC(Ldiv0)