bank-conflict.mir
5.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
# Test that the Post RA scheduler does not schedule back-to-back loads
# when there is another instruction to schedule. The scheduler avoids
# the back-to-back loads to reduce potential bank conflicts.
# CHECK: = L2_loadrigp
# CHECK: = A2_tfr
# CHECK: = L2_loadrigp
# CHECK: = L4_loadri_rr
# CHECK: = S2_tstbit_i
# CHECK: = L4_loadri_rr
--- |
%s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
%s.1 = type { i32, i32 }
@g0 = global i64 0
@g1 = global i64 0
@g2 = global i32 0
@g3 = global i32 0
@g4 = global i8 0
declare i32 @llvm.hexagon.S2.cl0(i32) #0
declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0
declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0
define void @f0(i32 %a0) {
b0:
%v0 = bitcast [10 x %s.0]* inttoptr (i32 -121502345 to [10 x %s.0]*) to [10 x %s.0]*
br label %b1
b1: ; preds = %b5, %b0
%v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ]
%v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ]
%v3 = load i32, i32* @g2, align 4
%v4 = load i32, i32* @g3, align 8
%v5 = and i32 %v4, %v3
%v6 = getelementptr [10 x %s.0], [10 x %s.0]* %v0, i32 0, i32 %v2
%v7 = bitcast %s.0* %v6 to %s.0*
%v8 = getelementptr %s.0, %s.0* %v7, i32 0, i32 12
%v9 = getelementptr %s.0, %s.0* %v7, i32 0, i32 13
br label %b2
b2: ; preds = %b4, %b1
%v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ]
%v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ]
%v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11)
%v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12)
%v14 = getelementptr [24 x i32], [24 x i32]* %v8, i32 0, i32 %v12
%v15 = load i32, i32* %v14, align 4
%v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15)
%v17 = getelementptr [24 x i32], [24 x i32]* %v9, i32 0, i32 %v12
%v18 = load i32, i32* %v17, align 4
%v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18)
%v20 = load i8, i8* @g4, align 1
%v21 = and i8 %v20, 1
%v22 = icmp eq i8 %v21, 0
br i1 %v22, label %b3, label %b4
b3: ; preds = %b2
%v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19)
store i64 %v23, i64* @g0, align 8
br label %b4
b4: ; preds = %b3, %b2
%v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ]
%v25 = icmp eq i32 %v13, 0
br i1 %v25, label %b5, label %b2
b5: ; preds = %b4
%v26 = add i32 %v2, 1
%v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10)
%v28 = add i32 %v1, 1
%v29 = icmp eq i32 %v28, %a0
br i1 %v29, label %b6, label %b1
b6: ; preds = %b5
store i64 %v19, i64* @g1, align 8
ret void
}
attributes #0 = { nounwind readnone }
...
---
name: f0
alignment: 16
tracksRegLiveness: true
registers:
liveins:
- { reg: '$r0', virtual-reg: '' }
fixedStack:
stack:
constants:
body: |
bb.0:
successors: %bb.1(0x80000000)
liveins: $r0:0x00000001
$r3 = A2_tfrsi 0
$r2 = A2_tfrsi -121502345
$r4 = A2_tfrsi 10
J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.1 (address-taken):
successors: %bb.2(0x80000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004
$r5 = M2_mpysip $r3, 1824
$r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load 4 from @g2)
$r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load 4 from @g3, align 8)
$r6 = A2_tfr $r5
$r7 = A2_and killed $r8, killed $r7
$r5 = M2_accii killed $r5, $r2, 1248
$r6 = M2_accii killed $r6, $r2, 1152
$d0 = A2_tfrpi 0
bb.2:
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004
$r8 = S2_cl0 $r7
$r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load 1 from @g4)
$r7 = S2_setbit_r killed $r7, $r8
$r9 = L4_loadri_rr $r6, $r8, 2 :: (load 4 from %ir.v14)
$r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load 4 from %ir.v17)
$d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf
$p0 = S2_tstbit_i killed $r12, 0
$d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf
$p1 = C2_cmpeqi $r7, 0
$d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf
$d0 = A2_tfrpt $p0, killed $d0, implicit $d0
S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store 8 into @g0)
$d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0
J2_jumpf killed $p1, %bb.2, implicit-def dead $pc
bb.3:
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004
$r3 = A2_addi killed $r3, 1
$r3 = A4_modwrapu killed $r3, $r4
ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
bb.4:
liveins: $r8:0x00000001, $r9:0x00000001
S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store 8 into @g1)
PS_jmpret killed $r31, implicit-def dead $pc
...