1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
| /* SPDX-License-Identifier: GPL-2.0 */
| /*
| * "memset" implementation for SH4
| *
| * Copyright (C) 1999 Niibe Yutaka
| * Copyright (c) 2009 STMicroelectronics Limited
| * Author: Stuart Menefy <stuart.menefy:st.com>
| */
|
| /*
| * void *memset(void *s, int c, size_t n);
| */
|
| #include <linux/linkage.h>
|
| ENTRY(memset)
| mov #12,r0
| add r6,r4
| cmp/gt r6,r0
| bt/s 40f ! if it's too small, set a byte at once
| mov r4,r0
| and #3,r0
| cmp/eq #0,r0
| bt/s 2f ! It's aligned
| sub r0,r6
| 1:
| dt r0
| bf/s 1b
| mov.b r5,@-r4
| 2: ! make VVVV
| extu.b r5,r5
| swap.b r5,r0 ! V0
| or r0,r5 ! VV
| swap.w r5,r0 ! VV00
| or r0,r5 ! VVVV
|
| ! Check if enough bytes need to be copied to be worth the big loop
| mov #0x40, r0 ! (MT)
| cmp/gt r6,r0 ! (MT) 64 > len => slow loop
|
| bt/s 22f
| mov r6,r0
|
| ! align the dst to the cache block size if necessary
| mov r4, r3
| mov #~(0x1f), r1
|
| and r3, r1
| cmp/eq r3, r1
|
| bt/s 11f ! dst is already aligned
| sub r1, r3 ! r3-r1 -> r3
| shlr2 r3 ! number of loops
|
| 10: mov.l r5,@-r4
| dt r3
| bf/s 10b
| add #-4, r6
|
| 11: ! dst is 32byte aligned
| mov r6,r2
| mov #-5,r0
| shld r0,r2 ! number of loops
|
| add #-32, r4
| mov r5, r0
| 12:
| movca.l r0,@r4
| mov.l r5,@(4, r4)
| mov.l r5,@(8, r4)
| mov.l r5,@(12,r4)
| mov.l r5,@(16,r4)
| mov.l r5,@(20,r4)
| add #-0x20, r6
| mov.l r5,@(24,r4)
| dt r2
| mov.l r5,@(28,r4)
| bf/s 12b
| add #-32, r4
|
| add #32, r4
| mov #8, r0
| cmp/ge r0, r6
| bf 40f
|
| mov r6,r0
| 22:
| shlr2 r0
| shlr r0 ! r0 = r6 >> 3
| 3:
| dt r0
| mov.l r5,@-r4 ! set 8-byte at once
| bf/s 3b
| mov.l r5,@-r4
| !
| mov #7,r0
| and r0,r6
|
| ! fill bytes (length may be zero)
| 40: tst r6,r6
| bt 5f
| 4:
| dt r6
| bf/s 4b
| mov.b r5,@-r4
| 5:
| rts
| mov r4,r0
|
|