1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
| /* SPDX-License-Identifier: GPL-2.0 */
| /*
| * strlen() for PPC32
| *
| * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
| *
| * Inspired from glibc implementation
| */
| #include <asm/ppc_asm.h>
| #include <asm/export.h>
| #include <asm/cache.h>
|
| .text
|
| /*
| * Algorithm:
| *
| * 1) Given a word 'x', we can test to see if it contains any 0 bytes
| * by subtracting 0x01010101, and seeing if any of the high bits of each
| * byte changed from 0 to 1. This works because the least significant
| * 0 byte must have had no incoming carry (otherwise it's not the least
| * significant), so it is 0x00 - 0x01 == 0xff. For all other
| * byte values, either they have the high bit set initially, or when
| * 1 is subtracted you get a value in the range 0x00-0x7f, none of which
| * have their high bit set. The expression here is
| * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
| * there were no 0x00 bytes in the word. You get 0x80 in bytes that
| * match, but possibly false 0x80 matches in the next more significant
| * byte to a true match due to carries. For little-endian this is
| * of no consequence since the least significant match is the one
| * we're interested in, but big-endian needs method 2 to find which
| * byte matches.
| * 2) Given a word 'x', we can test to see _which_ byte was zero by
| * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
| * This produces 0x80 in each byte that was zero, and 0x00 in all
| * the other bytes. The '| ~0x80808080' clears the low 7 bits in each
| * byte, and the '| x' part ensures that bytes with the high bit set
| * produce 0x00. The addition will carry into the high bit of each byte
| * iff that byte had one of its low 7 bits set. We can then just see
| * which was the most significant bit set and divide by 8 to find how
| * many to add to the index.
| * This is from the book 'The PowerPC Compiler Writer's Guide',
| * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
| */
|
| _GLOBAL(strlen)
| andi. r0, r3, 3
| lis r7, 0x0101
| addi r10, r3, -4
| addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
| rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */
| bne- 3f
| .balign IFETCH_ALIGN_BYTES
| 1: lwzu r9, 4(r10)
| 2: subf r8, r7, r9
| and. r8, r8, r6
| beq+ 1b
| andc. r8, r8, r9
| beq+ 1b
| andc r8, r9, r6
| orc r9, r9, r6
| subfe r8, r6, r8
| nor r8, r8, r9
| cntlzw r8, r8
| subf r3, r3, r10
| srwi r8, r8, 3
| add r3, r3, r8
| blr
|
| /* Missaligned string: make sure bytes before string are seen not 0 */
| 3: xor r10, r10, r0
| orc r8, r8, r8
| lwzu r9, 4(r10)
| slwi r0, r0, 3
| srw r8, r8, r0
| orc r9, r9, r8
| b 2b
| EXPORT_SYMBOL(strlen)
|
|