1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
| /* memcpy.S: optimised assembly memcpy
| *
| * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
| * Written by David Howells (dhowells@redhat.com)
| *
| * This program is free software; you can redistribute it and/or
| * modify it under the terms of the GNU General Public License
| * as published by the Free Software Foundation; either version
| * 2 of the License, or (at your option) any later version.
| */
|
|
| .text
| .p2align 4
|
| ###############################################################################
| #
| # void *memcpy(void *to, const char *from, size_t count)
| #
| # - NOTE: must not use any stack. exception detection performs function return
| # to caller's fixup routine, aborting the remainder of the copy
| #
| ###############################################################################
| .globl memcpy,__memcpy_end
| .type memcpy,@function
| memcpy:
| or.p gr8,gr9,gr4
| orcc gr10,gr0,gr0,icc3
| or.p gr10,gr4,gr4
| beqlr icc3,#0
|
| # optimise based on best common alignment for to, from & count
| andicc.p gr4,#0x0f,gr0,icc0
| setlos #8,gr11
| andicc.p gr4,#0x07,gr0,icc1
| beq icc0,#0,memcpy_16
| andicc.p gr4,#0x03,gr0,icc0
| beq icc1,#0,memcpy_8
| andicc.p gr4,#0x01,gr0,icc1
| beq icc0,#0,memcpy_4
| setlos.p #1,gr11
| beq icc1,#0,memcpy_2
|
| # do byte by byte copy
| sub.p gr8,gr11,gr3
| sub gr9,gr11,gr9
| 0: ldubu.p @(gr9,gr11),gr4
| subicc gr10,#1,gr10,icc0
| stbu.p gr4,@(gr3,gr11)
| bne icc0,#2,0b
| bralr
|
| # do halfword by halfword copy
| memcpy_2:
| setlos #2,gr11
| sub.p gr8,gr11,gr3
| sub gr9,gr11,gr9
| 0: lduhu.p @(gr9,gr11),gr4
| subicc gr10,#2,gr10,icc0
| sthu.p gr4,@(gr3,gr11)
| bne icc0,#2,0b
| bralr
|
| # do word by word copy
| memcpy_4:
| setlos #4,gr11
| sub.p gr8,gr11,gr3
| sub gr9,gr11,gr9
| 0: ldu.p @(gr9,gr11),gr4
| subicc gr10,#4,gr10,icc0
| stu.p gr4,@(gr3,gr11)
| bne icc0,#2,0b
| bralr
|
| # do double-word by double-word copy
| memcpy_8:
| sub.p gr8,gr11,gr3
| sub gr9,gr11,gr9
| 0: lddu.p @(gr9,gr11),gr4
| subicc gr10,#8,gr10,icc0
| stdu.p gr4,@(gr3,gr11)
| bne icc0,#2,0b
| bralr
|
| # do quad-word by quad-word copy
| memcpy_16:
| sub.p gr8,gr11,gr3
| sub gr9,gr11,gr9
| 0: lddu @(gr9,gr11),gr4
| lddu.p @(gr9,gr11),gr6
| subicc gr10,#16,gr10,icc0
| stdu gr4,@(gr3,gr11)
| stdu.p gr6,@(gr3,gr11)
| bne icc0,#2,0b
| bralr
| __memcpy_end:
|
| .size memcpy, __memcpy_end-memcpy
|
| ###############################################################################
| #
| # copy to/from userspace
| # - return the number of bytes that could not be copied (0 on complete success)
| #
| # long __memcpy_user(void *dst, const void *src, size_t count)
| #
| ###############################################################################
| .globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
| .type __memcpy_user,@function
| __memcpy_user:
| movsg lr,gr7
| subi.p sp,#8,sp
| add gr8,gr10,gr6 ; calculate expected end address
| stdi gr6,@(sp,#0)
|
| # abuse memcpy to do the dirty work
| call memcpy
| __memcpy_user_error_lr:
| ldi.p @(sp,#4),gr7
| setlos #0,gr8
| jmpl.p @(gr7,gr0)
| addi sp,#8,sp
|
| # deal any exception generated by memcpy
| # GR8 - memcpy's current dest address
| # GR11 - memset's step value (index register for store insns)
| __memcpy_user_error_handler:
| lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr
| add gr11,gr3,gr7
| sub.p gr4,gr7,gr8
|
| addi sp,#8,sp
| jmpl @(gr5,gr0)
|
| .size __memcpy_user, .-__memcpy_user
|
|