vector-to-loops.mlir
21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// RUN: mlir-opt %s -convert-vector-to-scf -split-input-file | FileCheck %s
// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -split-input-file | FileCheck %s --check-prefix=FULL-UNROLL
// CHECK-LABEL: func @materialize_read_1d() {
func @materialize_read_1d() {
%f0 = constant 0.0: f32
%A = alloc () : memref<7x42xf32>
affine.for %i0 = 0 to 7 step 4 {
affine.for %i1 = 0 to 42 step 4 {
%f1 = vector.transfer_read %A[%i0, %i1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
%ip1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i1)
%f2 = vector.transfer_read %A[%i0, %ip1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
%ip2 = affine.apply affine_map<(d0) -> (d0 + 2)> (%i1)
%f3 = vector.transfer_read %A[%i0, %ip2], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
%ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1)
%f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
// Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds.
// CHECK: {{.*}} = select
// CHECK: %[[FILTERED1:.*]] = select
// CHECK: {{.*}} = select
// CHECK: %[[FILTERED2:.*]] = select
// CHECK-NEXT: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @materialize_read_1d_partially_specialized
func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
%f0 = constant 0.0: f32
%A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
affine.for %i0 = 0 to 7 {
affine.for %i1 = 0 to %dyn1 {
affine.for %i2 = 0 to %dyn2 {
affine.for %i3 = 0 to 42 step 2 {
affine.for %i4 = 0 to %dyn4 {
%f1 = vector.transfer_read %A[%i0, %i1, %i2, %i3, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
%i3p1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i3)
%f2 = vector.transfer_read %A[%i0, %i1, %i2, %i3p1, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
}
}
}
}
}
// CHECK: %[[tensor:[0-9]+]] = alloc
// CHECK-NOT: {{.*}} dim %[[tensor]], %c0
// CHECK-NOT: {{.*}} dim %[[tensor]], %c3
return
}
// -----
// CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)>
// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
%f0 = constant 0.0: f32
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
// CHECK: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
// CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
// CHECK-NEXT: {{.*}} = select
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[L0:.*]] = select
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
// CHECK-NEXT: {{.*}} = select
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[L1:.*]] = select
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
// CHECK-NEXT: {{.*}} = select
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[L2:.*]] = select
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
// CHECK-NEXT: {{.*}} = select
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[L3:.*]] = select
//
// CHECK-NEXT: {{.*}} = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref<?x?x?x?xf32>
// CHECK-NEXT: store {{.*}}, %[[ALLOC]][%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: dealloc %[[ALLOC]] : memref<5x4x3xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT:}
// Check that I0 + I4 (of size 3) read from first index load(L0, ...) and write into last index store(..., I4)
// Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
// Other dimensions are just accessed with I1, I2 resp.
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
%f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, 0, d0)>} : memref<?x?x?x?xf32>, vector<5x4x3xf32>
}
}
}
}
return
}
// -----
// CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)>
// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
// CHECK: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32>
// CHECK: store %{{.*}}, {{.*}} : memref<vector<5x4x3xf32>>
// CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
// CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index
// CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index
// CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index
// CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
//
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
//
// CHECK-NEXT: {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
// CHECK: store {{.*}}, {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref<?x?x?x?xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: dealloc {{.*}} : memref<5x4x3xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT:}
//
// Check that I0 + I4 (of size 3) read from last index load(..., I4) and write into first index store(S0, ...)
// Check that I1 + I5 (of size 4) read from second index load(..., I5, ...) and write into second index store(..., S1, ...)
// Check that I3 + I6 (of size 5) read from first index load(I6, ...) and write into last index store(..., S3)
// Other dimension is just accessed with I2.
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant dense<1.000000e+00> : vector<5x4x3xf32>
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N step 4 {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
vector.transfer_write %f1, %A[%i0, %i1, %i2, %i3] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d1, d0)>} : vector<5x4x3xf32>, memref<?x?x?x?xf32>
}
}
}
}
return
}
// -----
// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
// CHECK-LABEL: transfer_read_progressive(
// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index
// FULL-UNROLL-LABEL: transfer_read_progressive(
// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index
func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x15xf32> {
// CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32
%f7 = constant 7.0: f32
// CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
// CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
// CHECK: affine.for %[[I:.*]] = 0 to 3 {
// CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
// CHECK: %[[cond1:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
// CHECK: scf.if %[[cond1]] {
// CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] : memref<?x?xf32>, vector<15xf32>
// CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
// CHECK: } else {
// CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
// CHECK: }
// CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK: %[[cst:.*]] = load %[[vmemref]][] : memref<vector<3x15xf32>>
// FULL-UNROLL: %[[pad:.*]] = constant 7.000000e+00 : f32
// FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
// FULL-UNROLL: %[[C0:.*]] = constant 0 : index
// FULL-UNROLL: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
// FULL-UNROLL: %[[DIM:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
// FULL-UNROLL: cmpi "slt", %[[base]], %[[DIM]] : index
// FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%[[base]], %[[base]]], %[[pad]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
// FULL-UNROLL: affine.apply #[[$MAP1]]()[%[[base]]]
// FULL-UNROLL: cmpi "slt", %{{.*}}, %[[DIM]] : index
// FULL-UNROLL: %[[VEC2:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[pad]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
// FULL-UNROLL: affine.apply #[[$MAP2]]()[%[[base]]]
// FULL-UNROLL: cmpi "slt", %{{.*}}, %[[DIM]] : index
// FULL-UNROLL: %[[VEC3:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[pad]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
%f = vector.transfer_read %A[%base, %base], %f7 :
memref<?x?xf32>, vector<3x15xf32>
return %f: vector<3x15xf32>
}
// -----
// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
// CHECK-LABEL: transfer_write_progressive(
// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
// FULL-UNROLL-LABEL: transfer_write_progressive(
// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK: %[[C0:.*]] = constant 0 : index
// CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
// CHECK: affine.for %[[I:.*]] = 0 to 3 {
// CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
// CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
// CHECK: scf.if %[[cmp]] {
// CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<3xvector<15xf32>>
// CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// CHECK: }
// FULL-UNROLL: %[[C0:.*]] = constant 0 : index
// FULL-UNROLL: %[[DIM:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
// FULL-UNROLL: %[[CMP0:.*]] = cmpi "slt", %[[base]], %[[DIM]] : index
// FULL-UNROLL: scf.if %[[CMP0]] {
// FULL-UNROLL: %[[V0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[V0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
// FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
// FULL-UNROLL: %[[CMP1:.*]] = cmpi "slt", %[[I1]], %[[DIM]] : index
// FULL-UNROLL: scf.if %[[CMP1]] {
// FULL-UNROLL: %[[V1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[V1]], %[[A]][%[[I1]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
// FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
// FULL-UNROLL: %[[CMP2:.*]] = cmpi "slt", %[[I2]], %[[DIM]] : index
// FULL-UNROLL: scf.if %[[CMP2]] {
// FULL-UNROLL: %[[V2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[V2]], %[[A]][%[[I2]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
vector.transfer_write %vec, %A[%base, %base] :
vector<3x15xf32>, memref<?x?xf32>
return
}
// -----
// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
// CHECK-LABEL: transfer_write_progressive_not_masked(
// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
// FULL-UNROLL-LABEL: transfer_write_progressive_not_masked(
// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive_not_masked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK-NOT: scf.if
// CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {
// CHECK-NEXT: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
// CHECK-NEXT: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<3xvector<15xf32>>
// CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
// FULL-UNROLL: %[[VEC1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
// FULL-UNROLL: %[[VEC2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
vector.transfer_write %vec, %A[%base, %base] {masked = [false, false]} :
vector<3x15xf32>, memref<?x?xf32>
return
}
// -----
// FULL-UNROLL-LABEL: transfer_read_simple
func @transfer_read_simple(%A : memref<2x2xf32>) -> vector<2x2xf32> {
%c0 = constant 0 : index
%f0 = constant 0.0 : f32
// FULL-UNROLL-DAG: %[[VC0:.*]] = constant dense<0.000000e+00> : vector<2x2xf32>
// FULL-UNROLL-DAG: %[[C0:.*]] = constant 0 : index
// FULL-UNROLL-DAG: %[[C1:.*]] = constant 1 : index
// FULL-UNROLL: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]]
// FULL-UNROLL: %[[RES0:.*]] = vector.insert %[[V0]], %[[VC0]] [0] : vector<2xf32> into vector<2x2xf32>
// FULL-UNROLL: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C1]], %[[C0]]]
// FULL-UNROLL: %[[RES1:.*]] = vector.insert %[[V1]], %[[RES0]] [1] : vector<2xf32> into vector<2x2xf32>
%0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<2x2xf32>, vector<2x2xf32>
return %0 : vector<2x2xf32>
}