memset.S
2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*
* memset - fill memory with a constant
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
/*
Written by Dave Gilbert <david.gilbert@linaro.org>
This memset routine is optimised on a Cortex-A9 and should work on
all ARMv7 processors.
*/
.syntax unified
.arch armv7-a
@ 2011-08-30 david.gilbert@linaro.org
@ Extracted from local git 2f11b436
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
#define CHARTSTMASK(c) 1<<(31-(c*8))
#else
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.text
.thumb
@ ---------------------------------------------------------------------------
.thumb_func
.align 2
.p2align 4,,15
.global __memset_arm
.type __memset_arm,%function
__memset_arm:
@ r0 = address
@ r1 = character
@ r2 = count
@ returns original address in r0
mov r3, r0 @ Leave r0 alone
cbz r2, 10f @ Exit if 0 length
tst r0, #7
beq 2f @ Already aligned
@ Ok, so we're misaligned here
1:
strb r1, [r3], #1
subs r2,r2,#1
tst r3, #7
cbz r2, 10f @ Exit if we hit the end
bne 1b @ go round again if still misaligned
2:
@ OK, so we're aligned
push {r4,r5,r6,r7}
bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
beq 5f
3:
@ POSIX says that ch is cast to an unsigned char. A uxtb is one
@ byte and takes two cycles, where an AND is four bytes but one
@ cycle.
and r1, #0xFF
orr r1, r1, r1, lsl#8 @ Same character into all bytes
orr r1, r1, r1, lsl#16
mov r5,r1
mov r6,r1
mov r7,r1
4:
subs r4,r4,#16
stmia r3!,{r1,r5,r6,r7}
bne 4b
and r2,r2,#15
@ At this point we're still aligned and we have upto align-1 bytes left to right
@ we can avoid some of the byte-at-a time now by testing for some big chunks
tst r2,#8
itt ne
subne r2,r2,#8
stmiane r3!,{r1,r5}
5:
pop {r4,r5,r6,r7}
cbz r2, 10f
@ Got to do any last < alignment bytes
6:
subs r2,r2,#1
strb r1,[r3],#1
bne 6b
10:
bx lr @ goodbye
.size __memset_arm, . - __memset_arm