pr47373.ll
6.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \
; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s
@a = local_unnamed_addr global float* null, align 8
; Function Attrs: nounwind
define void @d() local_unnamed_addr #0 {
; CHECK-LABEL: d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -208(r1)
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r29, 0(r3)
; CHECK-NEXT: bl c
; CHECK-NEXT: nop
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: bl b
; CHECK-NEXT: nop
; CHECK-NEXT: cmpwi r30, 1
; CHECK-NEXT: blt cr0, .LBB0_9
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: cmplwi r30, 4
; CHECK-NEXT: clrldi r4, r30, 32
; CHECK-NEXT: li r5, 0
; CHECK-NEXT: blt cr0, .LBB0_7
; CHECK-NEXT: # %bb.2: # %vector.memcheck
; CHECK-NEXT: rldic r6, r30, 2, 30
; CHECK-NEXT: add r7, r3, r6
; CHECK-NEXT: cmpld r29, r7
; CHECK-NEXT: add r6, r29, r6
; CHECK-NEXT: bc 4, lt, .LBB0_4
; CHECK-NEXT: # %bb.3: # %vector.memcheck
; CHECK-NEXT: cmpld r3, r6
; CHECK-NEXT: bc 12, lt, .LBB0_7
; CHECK-NEXT: .LBB0_4: # %vector.ph
; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29
; CHECK-NEXT: li r7, 15
; CHECK-NEXT: addi r6, r5, -4
; CHECK-NEXT: addi r8, r1, 144
; CHECK-NEXT: rldicl r6, r6, 62, 2
; CHECK-NEXT: addi r9, r1, 128
; CHECK-NEXT: addi r6, r6, 1
; CHECK-NEXT: addi r10, r1, 160
; CHECK-NEXT: mtctr r6
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: addi r11, r1, 112
; CHECK-NEXT: .LBB0_5: # %vector.body
; CHECK-NEXT: #
; CHECK-NEXT: add r12, r3, r6
; CHECK-NEXT: lvx v3, r3, r6
; CHECK-NEXT: lvx v5, r12, r7
; CHECK-NEXT: add r12, r29, r6
; CHECK-NEXT: lvsl v2, r3, r6
; CHECK-NEXT: vperm v2, v3, v5, v2
; CHECK-NEXT: lvx v3, r29, r6
; CHECK-NEXT: lvx v5, r12, r7
; CHECK-NEXT: lvsl v4, r29, r6
; CHECK-NEXT: stvx v2, 0, r8
; CHECK-NEXT: vperm v2, v3, v5, v4
; CHECK-NEXT: stvx v2, 0, r9
; CHECK-NEXT: lfs f0, 156(r1)
; CHECK-NEXT: lfs f1, 140(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 136(r1)
; CHECK-NEXT: stfs f0, 172(r1)
; CHECK-NEXT: lfs f0, 152(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 132(r1)
; CHECK-NEXT: stfs f0, 168(r1)
; CHECK-NEXT: lfs f0, 148(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 128(r1)
; CHECK-NEXT: stfs f0, 164(r1)
; CHECK-NEXT: lfs f0, 144(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: stfs f0, 160(r1)
; CHECK-NEXT: lvx v2, 0, r10
; CHECK-NEXT: stvx v2, 0, r11
; CHECK-NEXT: ld r0, 112(r1)
; CHECK-NEXT: stdx r0, r29, r6
; CHECK-NEXT: addi r6, r6, 16
; CHECK-NEXT: ld r0, 120(r1)
; CHECK-NEXT: std r0, 8(r12)
; CHECK-NEXT: bdnz .LBB0_5
; CHECK-NEXT: # %bb.6: # %middle.block
; CHECK-NEXT: cmpld r5, r4
; CHECK-NEXT: beq cr0, .LBB0_9
; CHECK-NEXT: .LBB0_7: # %for.body.preheader18
; CHECK-NEXT: sldi r6, r5, 2
; CHECK-NEXT: sub r5, r4, r5
; CHECK-NEXT: addi r6, r6, -4
; CHECK-NEXT: add r3, r3, r6
; CHECK-NEXT: add r4, r29, r6
; CHECK-NEXT: mtctr r5
; CHECK-NEXT: .LBB0_8: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lfsu f0, 4(r4)
; CHECK-NEXT: lfsu f1, 4(r3)
; CHECK-NEXT: fdivs f0, f0, f1
; CHECK-NEXT: stfs f0, 0(r4)
; CHECK-NEXT: bdnz .LBB0_8
; CHECK-NEXT: .LBB0_9: # %for.end
; CHECK-NEXT: ld r30, 192(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 208
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%0 = load float*, float** @a, align 8
%call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2
%call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2
%cmp11 = icmp sgt i32 %call, 0
br i1 %cmp11, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %call to i64
%min.iters.check = icmp ult i32 %call, 4
br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck
vector.memcheck: ; preds = %for.body.preheader
%scevgep = getelementptr float, float* %0, i64 %wide.trip.count
%scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count
%bound0 = icmp ult float* %0, %scevgep15
%bound1 = icmp ult float* %call1, %scevgep
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %for.body.preheader18, label %vector.ph
vector.ph: ; preds = %vector.memcheck
%n.vec = and i64 %wide.trip.count, 4294967292
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%1 = getelementptr inbounds float, float* %call1, i64 %index
%2 = bitcast float* %1 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %2, align 4
%3 = getelementptr inbounds float, float* %0, i64 %index
%4 = bitcast float* %3 to <4 x float>*
%wide.load17 = load <4 x float>, <4 x float>* %4, align 4
%5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load
%6 = bitcast float* %3 to <4 x float>*
store <4 x float> %5, <4 x float>* %6, align 4
%index.next = add i64 %index, 4
%7 = icmp eq i64 %index.next, %n.vec
br i1 %7, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
br i1 %cmp.n, label %for.end, label %for.body.preheader18
for.body.preheader18: ; preds = %middle.block, %vector.memcheck, %for.body.preheader
%indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
br label %for.body
for.body: ; preds = %for.body.preheader18, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ]
%arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv
%8 = load float, float* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv
%9 = load float, float* %arrayidx3, align 4
%div = fdiv reassoc nsz arcp afn float %9, %8
store float %div, float* %arrayidx3, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body
for.end: ; preds = %for.body, %middle.block, %entry
ret void
}
declare signext i32 @c(...) local_unnamed_addr #1
declare float* @b(...) local_unnamed_addr #1
attributes #0 = { nounwind }