loop-blocks.ll
6.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
; These tests check for loop branching structure, and that the loop align
; directive is placed in the expected place.
; CodeGen should insert a branch into the middle of the loop in
; order to avoid a branch within the loop.
; CHECK-LABEL: simple:
; CHECK: align
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: callq loop_header
; CHECK: js .LBB0_3
; CHECK-NEXT: callq loop_latch
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: callq exit
define void @simple() nounwind {
entry:
br label %loop
loop:
call void @loop_header()
%t0 = tail call i32 @get()
%t1 = icmp slt i32 %t0, 0
br i1 %t1, label %done, label %bb
bb:
call void @loop_latch()
br label %loop
done:
call void @exit()
ret void
}
; CodeGen should move block_a to the top of the loop so that it
; falls through into the loop, avoiding a branch within the loop.
; CHECK-LABEL: slightly_more_involved:
; CHECK: jmp .LBB1_1
; CHECK-NEXT: align
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: callq bar99
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: callq body
define void @slightly_more_involved() nounwind {
entry:
br label %loop
loop:
call void @body()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 2
br i1 %t1, label %block_a, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 99
br i1 %t3, label %exit, label %loop
block_a:
call void @bar99()
br label %loop
exit:
call void @exit()
ret void
}
; Same as slightly_more_involved, but block_a is now a CFG diamond with
; fallthrough edges which should be preserved.
; "callq block_a_merge_func" is tail duped.
; CHECK-LABEL: yet_more_involved:
; CHECK: jmp .LBB2_1
; CHECK-NEXT: align
; CHECK: .LBB2_1:
; CHECK-NEXT: callq body
; CHECK-NEXT: callq get
; CHECK-NEXT: cmpl $2, %eax
; CHECK-NEXT: jge .LBB2_2
; CHECK-NEXT: callq bar99
; CHECK-NEXT: callq get
; CHECK-NEXT: cmpl $2999, %eax
; CHECK-NEXT: jg .LBB2_6
; CHECK-NEXT: callq block_a_true_func
; CHECK-NEXT: callq block_a_merge_func
; CHECK-NEXT: jmp .LBB2_1
; CHECK-NEXT: align
; CHECK-NEXT: .LBB2_6:
; CHECK-NEXT: callq block_a_false_func
; CHECK-NEXT: callq block_a_merge_func
; CHECK-NEXT: jmp .LBB2_1
define void @yet_more_involved() nounwind {
entry:
br label %loop
loop:
call void @body()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 2
br i1 %t1, label %block_a, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 99
br i1 %t3, label %exit, label %loop
block_a:
call void @bar99()
%z0 = call i32 @get()
%z1 = icmp slt i32 %z0, 3000
br i1 %z1, label %block_a_true, label %block_a_false
block_a_true:
call void @block_a_true_func()
br label %block_a_merge
block_a_false:
call void @block_a_false_func()
br label %block_a_merge
block_a_merge:
call void @block_a_merge_func()
br label %loop
exit:
call void @exit()
ret void
}
; CodeGen should move the CFG islands that are part of the loop but don't
; conveniently fit anywhere so that they are at least contiguous with the
; loop.
; CHECK-LABEL: cfg_islands:
; CHECK: jmp .LBB3_1
; CHECK-NEXT: align
; CHECK-NEXT: .LBB3_7:
; CHECK-NEXT: callq bar100
; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: callq loop_header
; CHECK: jl .LBB3_7
; CHECK: jge .LBB3_3
; CHECK-NEXT: callq bar101
; CHECK-NEXT: jmp .LBB3_1
; CHECK-NEXT: align
; CHECK-NEXT: .LBB3_3:
; CHECK: jge .LBB3_4
; CHECK-NEXT: callq bar102
; CHECK-NEXT: jmp .LBB3_1
; CHECK-NEXT: .LBB3_4:
; CHECK: jl .LBB3_6
; CHECK-NEXT: callq loop_latch
; CHECK-NEXT: jmp .LBB3_1
; CHECK-NEXT: .LBB3_6:
define void @cfg_islands() nounwind {
entry:
br label %loop
loop:
call void @loop_header()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 100
br i1 %t1, label %block100, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 101
br i1 %t3, label %block101, label %bb1
bb1:
%t4 = call i32 @get()
%t5 = icmp slt i32 %t4, 102
br i1 %t5, label %block102, label %bb2
bb2:
%t6 = call i32 @get()
%t7 = icmp slt i32 %t6, 103
br i1 %t7, label %exit, label %bb3
bb3:
call void @loop_latch()
br label %loop
exit:
call void @exit()
ret void
block100:
call void @bar100()
br label %loop
block101:
call void @bar101()
br label %loop
block102:
call void @bar102()
br label %loop
}
; CHECK-LABEL: check_minsize:
; CHECK-NOT: align
; CHECK: .LBB4_1:
; CHECK-NEXT: callq loop_header
; CHECK: callq loop_latch
; CHECK: .LBB4_3:
; CHECK: callq exit
define void @check_minsize() minsize nounwind {
entry:
br label %loop
loop:
call void @loop_header()
%t0 = tail call i32 @get()
%t1 = icmp slt i32 %t0, 0
br i1 %t1, label %done, label %bb
bb:
call void @loop_latch()
br label %loop
done:
call void @exit()
ret void
}
; This is exactly the same function as slightly_more_involved.
; The difference is that when optimising for size, we do not want
; to see this reordering.
; CHECK-LABEL: slightly_more_involved_2:
; CHECK-NOT: jmp .LBB5_1
; CHECK: .LBB5_1:
; CHECK-NEXT: callq body
define void @slightly_more_involved_2() #0 {
entry:
br label %loop
loop:
call void @body()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 2
br i1 %t1, label %block_a, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 99
br i1 %t3, label %exit, label %loop
block_a:
call void @bar99()
br label %loop
exit:
call void @exit()
ret void
}
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
; CHECK-LABEL: slightly_more_involved_2_pgso:
; CHECK-NOT: jmp .LBB6_1
; CHECK: .LBB6_1:
; CHECK-NEXT: callq body
define void @slightly_more_involved_2_pgso() norecurse nounwind readnone uwtable !prof !14 {
entry:
br label %loop
loop:
call void @body()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 2
br i1 %t1, label %block_a, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 99
br i1 %t3, label %exit, label %loop
block_a:
call void @bar99()
br label %loop
exit:
call void @exit()
ret void
}
declare void @bar99() nounwind
declare void @bar100() nounwind
declare void @bar101() nounwind
declare void @bar102() nounwind
declare void @body() nounwind
declare void @exit() nounwind
declare void @loop_header() nounwind
declare void @loop_latch() nounwind
declare i32 @get() nounwind
declare void @block_a_true_func() nounwind
declare void @block_a_false_func() nounwind
declare void @block_a_merge_func() nounwind
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}