atomics-indexed.ll
5.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
; This is already checked for in Atomics-64.ll
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
; In this file, we check that atomic load/store can make use of the indexed
; versions of the instructions.
; Indexed version of loads
define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
; PPC32-LABEL: load_x_i8_seq_cst:
; PPC32: # %bb.0:
; PPC32-NEXT: lis r4, 1
; PPC32-NEXT: sync
; PPC32-NEXT: ori r4, r4, 24464
; PPC32-NEXT: lbzx r3, r3, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i8_seq_cst:
; PPC64: # %bb.0:
; PPC64-NEXT: lis r4, 1
; PPC64-NEXT: sync
; PPC64-NEXT: ori r4, r4, 24464
; PPC64-NEXT: lbzx r3, r3, r4
; PPC64-NEXT: cmpd cr7, r3, r3
; PPC64-NEXT: bne- cr7, .+4
; PPC64-NEXT: isync
; PPC64-NEXT: blr
; CHECK-PPC32: lwsync
; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
; CHECK-PPC64: bne- [[CR]], .+4
; CHECK-PPC64: isync
%ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
%val = load atomic i8, i8* %ptr seq_cst, align 1
ret i8 %val
}
define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
; PPC32-LABEL: load_x_i16_acquire:
; PPC32: # %bb.0:
; PPC32-NEXT: lis r4, 2
; PPC32-NEXT: ori r4, r4, 48928
; PPC32-NEXT: lhzx r3, r3, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i16_acquire:
; PPC64: # %bb.0:
; PPC64-NEXT: lis r4, 2
; PPC64-NEXT: ori r4, r4, 48928
; PPC64-NEXT: lhzx r3, r3, r4
; PPC64-NEXT: cmpd cr7, r3, r3
; PPC64-NEXT: bne- cr7, .+4
; PPC64-NEXT: isync
; PPC64-NEXT: blr
; CHECK-PPC32: lwsync
; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
; CHECK-PPC64: bne- [[CR]], .+4
; CHECK-PPC64: isync
%ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
%val = load atomic i16, i16* %ptr acquire, align 2
ret i16 %val
}
define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: load_x_i32_monotonic:
; CHECK: # %bb.0:
; CHECK-NEXT: lis r4, 5
; CHECK-NEXT: ori r4, r4, 32320
; CHECK-NEXT: lwzx r3, r3, r4
; CHECK-NEXT: blr
%ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
%val = load atomic i32, i32* %ptr monotonic, align 4
ret i32 %val
}
define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
; PPC32-LABEL: load_x_i64_unordered:
; PPC32: # %bb.0:
; PPC32-NEXT: mflr r0
; PPC32-NEXT: stw r0, 4(r1)
; PPC32-NEXT: stwu r1, -16(r1)
; PPC32-NEXT: .cfi_def_cfa_offset 16
; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: addi r3, r3, -896
; PPC32-NEXT: addis r3, r3, 11
; PPC32-NEXT: li r4, 0
; PPC32-NEXT: bl __atomic_load_8
; PPC32-NEXT: lwz r0, 20(r1)
; PPC32-NEXT: addi r1, r1, 16
; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i64_unordered:
; PPC64: # %bb.0:
; PPC64-NEXT: lis r4, 10
; PPC64-NEXT: ori r4, r4, 64640
; PPC64-NEXT: ldx r3, r3, r4
; PPC64-NEXT: blr
%ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
%val = load atomic i64, i64* %ptr unordered, align 8
ret i64 %val
}
; Indexed version of stores
define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: store_x_i8_seq_cst:
; CHECK: # %bb.0:
; CHECK-NEXT: lis r4, 1
; CHECK-NEXT: ori r4, r4, 24464
; CHECK-NEXT: li r5, 42
; CHECK-NEXT: sync
; CHECK-NEXT: stbx r5, r3, r4
; CHECK-NEXT: blr
%ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
store atomic i8 42, i8* %ptr seq_cst, align 1
ret void
}
define void @store_x_i16_release([100000 x i16]* %mem) {
; CHECK-LABEL: store_x_i16_release:
; CHECK: # %bb.0:
; CHECK-NEXT: lis r4, 2
; CHECK-NEXT: ori r4, r4, 48928
; CHECK-NEXT: li r5, 42
; CHECK-NEXT: lwsync
; CHECK-NEXT: sthx r5, r3, r4
; CHECK-NEXT: blr
%ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
store atomic i16 42, i16* %ptr release, align 2
ret void
}
define void @store_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: store_x_i32_monotonic:
; CHECK: # %bb.0:
; CHECK-NEXT: lis r4, 5
; CHECK-NEXT: ori r4, r4, 32320
; CHECK-NEXT: li r5, 42
; CHECK-NEXT: stwx r5, r3, r4
; CHECK-NEXT: blr
%ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
store atomic i32 42, i32* %ptr monotonic, align 4
ret void
}
define void @store_x_i64_unordered([100000 x i64]* %mem) {
; PPC32-LABEL: store_x_i64_unordered:
; PPC32: # %bb.0:
; PPC32-NEXT: mflr r0
; PPC32-NEXT: stw r0, 4(r1)
; PPC32-NEXT: stwu r1, -16(r1)
; PPC32-NEXT: .cfi_def_cfa_offset 16
; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: addi r3, r3, -896
; PPC32-NEXT: addis r3, r3, 11
; PPC32-NEXT: li r5, 0
; PPC32-NEXT: li r6, 42
; PPC32-NEXT: li r7, 0
; PPC32-NEXT: bl __atomic_store_8
; PPC32-NEXT: lwz r0, 20(r1)
; PPC32-NEXT: addi r1, r1, 16
; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
; PPC64-LABEL: store_x_i64_unordered:
; PPC64: # %bb.0:
; PPC64-NEXT: lis r4, 10
; PPC64-NEXT: ori r4, r4, 64640
; PPC64-NEXT: li r5, 42
; PPC64-NEXT: stdx r5, r3, r4
; PPC64-NEXT: blr
%ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
store atomic i64 42, i64* %ptr unordered, align 8
ret void
}