movtopush64.ll
8.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP
; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g)
declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
declare void @eightparams16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h)
declare void @eightparams64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j)
declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i8* %h, i32 %i, i64 %j)
declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i)
; We should get pushes for the last 4 parameters. Test that the
; in-register parameters are all in the right places, and check
; that the stack manipulations are correct and correctly
; described by the DWARF directives. Test that the switch
; to disable the optimization works and that the optimization
; doesn't kick in on Windows64 where it is not allowed.
; NORMAL-LABEL: test1
; NORMAL: pushq
; NORMAL-DAG: movl $1, %edi
; NORMAL-DAG: movl $2, %esi
; NORMAL-DAG: movl $3, %edx
; NORMAL-DAG: movl $4, %ecx
; NORMAL-DAG: movl $5, %r8d
; NORMAL-DAG: movl $6, %r9d
; NORMAL: pushq $10
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $9
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $8
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: pushq $7
; NORMAL: .cfi_adjust_cfa_offset 8
; NORMAL: callq ten_params
; NORMAL: addq $32, %rsp
; NORMAL: .cfi_adjust_cfa_offset -32
; NORMAL: popq
; NORMAL: retq
; NOPUSH-LABEL: test1
; NOPUSH-NOT: pushq
; NOPUSH: retq
define void @test1() {
entry:
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
ret void
}
; The presence of a frame pointer should not prevent pushes. But we
; don't need the CFI directives in that case.
; Also check that we generate the right pushes for >8bit immediates.
; NORMALFP-LABEL: test2
; NORMALFP: pushq $10000
; NORMALFP-NEXT: pushq $9000
; NORMALFP-NEXT: pushq $8000
; NORMALFP-NEXT: pushq $7000
; NORMALFP-NEXT: callq {{_?}}ten_params
define void @test2(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000)
ret void
}
; Parameters 7 & 8 should push a 64-bit register.
; TODO: Note that the regular expressions disallow r8 and r9. That's fine for
; now, because the pushes will always follow the moves into r8 and r9.
; Eventually, though, we want to be able to schedule the pushes better.
; In this example, it will save two copies, because we have to move the
; incoming parameters out of %rdi and %rsi to make room for the outgoing
; parameters.
; NORMAL-LABEL: test3
; NORMAL: pushq $10000
; NORMAL: pushq $9000
; NORMAL: pushq %r{{..}}
; NORMAL: pushq %r{{..}}
; NORMAL: callq ten_params
define void @test3(i32 %a, i64 %b) {
entry:
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000)
ret void
}
; Check that we avoid the optimization for just one push.
; NORMAL-LABEL: test4
; NORMAL: movl $7, (%rsp)
; NORMAL: callq seven_params
define void @test4() {
entry:
call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7)
ret void
}
; Check that pushing link-time constant addresses works correctly
; NORMAL-LABEL: test5
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq $ext
; NORMAL: pushq $7
; NORMAL: callq ten_params_ptr
@ext = external constant i8
define void @test5() {
entry:
call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i8* @ext, i32 9, i64 10)
ret void
}
; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem.
; NORMAL-LABEL: test6
; NORMAL: movq %rsi, [[REG64:%.+]]
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq ([[REG64]])
; NORMAL: pushq {{%r..}}
; NORMAL: callq ten_params
define void @test6(i32* %p32, i64* %p64) {
entry:
%v32 = load i32, i32* %p32
%v64 = load i64, i64* %p64
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10)
ret void
}
; Fold stack-relative loads into the push with correct offsets.
; Do the same for an indirect call whose address is loaded from the stack.
; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call
; sequence, 72 bytes are allocated to the stack, 48 for register saves and
; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at
; 88(%rsp). The call address can be stored anywhere in the local space but
; happens to be stored at 8(%rsp). Each push bumps these offsets up by
; 8 bytes.
; NORMAL-LABEL: test7
; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}}
; NORMAL: pushq 88(%rsp)
; NORMAL: pushq $9
; NORMAL: pushq 96(%rsp)
; NORMAL: pushq $7
; NORMAL: callq *40(%rsp)
define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) {
entry:
%stack_fptr = alloca void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*
store void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* @ten_params, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
%ten_params_ptr = load volatile void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8)
ret void
}
; We can't fold the load from the global into the push because of
; interference from the store
; NORMAL-LABEL: test8
; NORMAL: movq the_global(%rip), [[REG:%r.+]]
; NORMAL: movq $42, the_global
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq [[REG]]
; NORMAL: pushq $7
; NORMAL: callq ten_params
@the_global = external global i64
define void @test8() {
%myload = load i64, i64* @the_global
store i64 42, i64* @the_global
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10)
ret void
}
; Converting one function call to use pushes negatively affects
; other calls that pass arguments on the stack without pushes.
; If the cost outweighs the benefit, avoid using pushes.
; NORMAL-LABEL: test9
; NORMAL: callq cannot_push
; NORMAL-NOT: push
; NORMAL: callq ten_params
define void @test9(float %p1) {
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
ret void
}
; But if the benefit outweighs the cost, use pushes.
; NORMAL-LABEL: test10
; NORMAL: callq cannot_push
; NORMAL: pushq $10
; NORMAL: pushq $9
; NORMAL: pushq $8
; NORMAL: pushq $7
; NORMAL: callq ten_params
define void @test10(float %p1) {
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
ret void
}
; NORMAL-LABEL: pr34863_16
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_16(i16 %x) minsize nounwind {
entry:
tail call void @eightparams16(i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 0, i16 -1)
ret void
}
; NORMAL-LABEL: pr34863_32
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_32(i32 %x) minsize nounwind {
entry:
tail call void @eightparams(i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 0, i32 -1)
ret void
}
; NORMAL-LABEL: pr34863_64
; NORMAL: pushq ${{-1|65535}}
; NORMAL-NEXT: pushq $0
; NORMAL-NEXT: call
define void @pr34863_64(i64 %x) minsize nounwind {
entry:
tail call void @eightparams64(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 0, i64 -1)
ret void
}