1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
| /* Copyright (C) 2006 Free Software Foundation, Inc.
|
| * SPDX-License-Identifier: GPL-2.0+
| */
|
| /* Moderately Space-optimized libgcc routines for the Renesas SH /
| STMicroelectronics ST40 CPUs.
| Contributed by J"orn Rennecke joern.rennecke@st.com. */
|
| /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
| sh4-200 run times:
| udiv small divisor: 55 cycles
| udiv large divisor: 52 cycles
| sdiv small divisor, positive result: 59 cycles
| sdiv large divisor, positive result: 56 cycles
| sdiv small divisor, negative result: 65 cycles (*)
| sdiv large divisor, negative result: 62 cycles (*)
| (*): r2 is restored in the rts delay slot and has a lingering latency
| of two more cycles. */
| .balign 4
| .global __udivsi3_i4i
| .global __udivsi3_i4
| .set __udivsi3_i4, __udivsi3_i4i
| .type __udivsi3_i4i, @function
| .type __sdivsi3_i4i, @function
| __udivsi3_i4i:
| sts pr,r1
| mov.l r4,@-r15
| extu.w r5,r0
| cmp/eq r5,r0
| swap.w r4,r0
| shlr16 r4
| bf/s large_divisor
| div0u
| mov.l r5,@-r15
| shll16 r5
| sdiv_small_divisor:
| div1 r5,r4
| bsr div6
| div1 r5,r4
| div1 r5,r4
| bsr div6
| div1 r5,r4
| xtrct r4,r0
| xtrct r0,r4
| bsr div7
| swap.w r4,r4
| div1 r5,r4
| bsr div7
| div1 r5,r4
| xtrct r4,r0
| mov.l @r15+,r5
| swap.w r0,r0
| mov.l @r15+,r4
| jmp @r1
| rotcl r0
| div7:
| div1 r5,r4
| div6:
| div1 r5,r4; div1 r5,r4; div1 r5,r4
| div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
|
| divx3:
| rotcl r0
| div1 r5,r4
| rotcl r0
| div1 r5,r4
| rotcl r0
| rts
| div1 r5,r4
|
| large_divisor:
| mov.l r5,@-r15
| sdiv_large_divisor:
| xor r4,r0
| .rept 4
| rotcl r0
| bsr divx3
| div1 r5,r4
| .endr
| mov.l @r15+,r5
| mov.l @r15+,r4
| jmp @r1
| rotcl r0
|
| .global __sdivsi3_i4i
| .global __sdivsi3_i4
| .global __sdivsi3
| .set __sdivsi3_i4, __sdivsi3_i4i
| .set __sdivsi3, __sdivsi3_i4i
| __sdivsi3_i4i:
| mov.l r4,@-r15
| cmp/pz r5
| mov.l r5,@-r15
| bt/s pos_divisor
| cmp/pz r4
| neg r5,r5
| extu.w r5,r0
| bt/s neg_result
| cmp/eq r5,r0
| neg r4,r4
| pos_result:
| swap.w r4,r0
| bra sdiv_check_divisor
| sts pr,r1
| pos_divisor:
| extu.w r5,r0
| bt/s pos_result
| cmp/eq r5,r0
| neg r4,r4
| neg_result:
| mova negate_result,r0
| ;
| mov r0,r1
| swap.w r4,r0
| lds r2,macl
| sts pr,r2
| sdiv_check_divisor:
| shlr16 r4
| bf/s sdiv_large_divisor
| div0u
| bra sdiv_small_divisor
| shll16 r5
| .balign 4
| negate_result:
| neg r0,r0
| jmp @r2
| sts macl,r2
|
|