Promotion.cpp
17.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
//===- Promotion.cpp - Implementation of linalg Promotion -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the linalg dialect Promotion pass.
//
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/FoldUtils.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/CommandLine.h"
using namespace mlir;
using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;
using namespace mlir::scf;
using llvm::MapVector;
using folded_affine_min = FoldedValueBuilder<AffineMinOp>;
using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>;
using folded_std_dim = FoldedValueBuilder<DimOp>;
using folded_std_subview = FoldedValueBuilder<SubViewOp>;
using folded_std_view = FoldedValueBuilder<ViewOp>;
#define DEBUG_TYPE "linalg-promotion"
/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp
/// is a constant then return a new value set to the smallest such constant.
/// Otherwise return size.
static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc,
Value size) {
Optional<int64_t> boundingConst = {};
if (auto affineMinOp = size.getDefiningOp<AffineMinOp>()) {
for (auto e : affineMinOp.getAffineMap().getResults())
if (auto cst = e.dyn_cast<AffineConstantExpr>())
boundingConst = boundingConst
? std::min(boundingConst.getValue(), cst.getValue())
: cst.getValue();
} else if (auto constIndexOp = size.getDefiningOp<ConstantOp>()) {
if (constIndexOp.getType().isa<IndexType>())
boundingConst = constIndexOp.value().cast<IntegerAttr>().getInt();
}
return boundingConst && *boundingConst >= 0
? b.create<ConstantIndexOp>(loc, *boundingConst)
: size;
}
/// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly
/// the size needed, otherwise try to allocate a static bounding box.
static Value allocBuffer(const LinalgPromotionOptions &options,
Type elementType, Value size, bool dynamicBuffers,
OperationFolder *folder,
Optional<unsigned> alignment = None) {
auto *ctx = size.getContext();
auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8);
IntegerAttr alignment_attr;
if (alignment.hasValue())
alignment_attr =
IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue());
if (!dynamicBuffers)
if (auto cst = size.getDefiningOp<ConstantIndexOp>())
return options.useAlloca
? std_alloca(MemRefType::get(width * cst.getValue(),
IntegerType::get(8, ctx)),
ValueRange{}, alignment_attr)
.value
: std_alloc(MemRefType::get(width * cst.getValue(),
IntegerType::get(8, ctx)),
ValueRange{}, alignment_attr)
.value;
Value mul =
folded_std_muli(folder, folded_std_constant_index(folder, width), size);
return options.useAlloca
? std_alloca(MemRefType::get(-1, IntegerType::get(8, ctx)), mul,
alignment_attr)
.value
: std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul,
alignment_attr)
.value;
}
/// Default allocation callback function. This allocates a promoted buffer when
/// no call back to do so is provided. The default is to allocate a
/// memref<..xi8> and return a view to get a memref type of shape
/// boundingSubViewSize.
static Optional<Value> defaultAllocBufferCallBack(
const LinalgPromotionOptions &options, OpBuilder &builder,
SubViewOp subView, ArrayRef<Value> boundingSubViewSize, bool dynamicBuffers,
Optional<unsigned> alignment, OperationFolder *folder) {
ShapedType viewType = subView.getType();
int64_t rank = viewType.getRank();
(void)rank;
assert(rank > 0 && boundingSubViewSize.size() == static_cast<size_t>(rank));
auto zero = folded_std_constant_index(folder, 0);
auto one = folded_std_constant_index(folder, 1);
Value allocSize = one;
for (auto size : llvm::enumerate(boundingSubViewSize))
allocSize = folded_std_muli(folder, allocSize, size.value());
Value buffer = allocBuffer(options, viewType.getElementType(), allocSize,
dynamicBuffers, folder, alignment);
SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(),
ShapedType::kDynamicSize);
Value view = folded_std_view(
folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer,
zero, boundingSubViewSize);
return view;
}
/// Default implementation of deallocation of the buffer use for promotion. It
/// expects to get the same value that the default allocation method returned,
/// i.e. result of a ViewOp.
static LogicalResult
defaultDeallocBufferCallBack(const LinalgPromotionOptions &options,
OpBuilder &b, Value fullLocalView) {
auto viewOp = fullLocalView.getDefiningOp<ViewOp>();
assert(viewOp && "expected full local view to be a ViewOp");
if (!options.useAlloca)
std_dealloc(viewOp.source());
return success();
}
namespace {
/// Helper struct that captures the information required to apply the
/// transformation on each op. This bridges the abstraction gap with the
/// user-facing API which exposes positional arguments to control which operands
/// are promoted.
struct LinalgOpInstancePromotionOptions {
LinalgOpInstancePromotionOptions(LinalgOp op,
const LinalgPromotionOptions &options);
/// SubViews to promote.
MapVector<unsigned, Value> subViews;
/// True if the full view should be used for the promoted buffer.
DenseMap<Value, bool> useFullTileBuffers;
/// Callback functions for allocation and deallocation of promoted buffers, as
/// well as to copy the data into and out of these buffers.
AllocBufferCallbackFn allocationFn;
DeallocBufferCallbackFn deallocationFn;
CopyCallbackFn copyInFn;
CopyCallbackFn copyOutFn;
/// Allow the use of dynamicaly-sized buffers.
bool dynamicBuffers;
/// Alignment of promoted buffer.
Optional<unsigned> alignment;
};
struct PromotionInfo {
Value fullLocalView;
Value partialLocalView;
};
} // namespace
LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions(
LinalgOp linalgOp, const LinalgPromotionOptions &options)
: subViews(), dynamicBuffers(options.dynamicBuffers),
alignment(options.alignment) {
unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers();
auto vUseFullTileBuffers =
options.useFullTileBuffers.getValueOr(llvm::SmallBitVector());
vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault);
for (unsigned idx = 0; idx != nBuffers; ++idx) {
if (options.operandsToPromote && !options.operandsToPromote->count(idx))
continue;
auto *op = linalgOp.getBuffer(idx).getDefiningOp();
if (auto sv = dyn_cast_or_null<SubViewOp>(op)) {
subViews[idx] = sv;
useFullTileBuffers[sv] = vUseFullTileBuffers[idx];
}
}
allocationFn =
(options.allocationFn ? *(options.allocationFn)
: [&](OpBuilder &builder, SubViewOp subViewOp,
ArrayRef<Value> boundingSubViewSize,
OperationFolder *folder) -> Optional<Value> {
return defaultAllocBufferCallBack(options, builder, subViewOp,
boundingSubViewSize, dynamicBuffers,
alignment, folder);
});
deallocationFn =
(options.deallocationFn
? *(options.deallocationFn)
: [&](OpBuilder &b, Value buffer) {
return defaultDeallocBufferCallBack(options, b, buffer);
});
auto defaultCopyCallBack = [&](OpBuilder &builder, Value src,
Value dst) -> LogicalResult {
linalg_copy(src, dst);
return success();
};
copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack);
copyOutFn = (options.copyOutFn ? *(options.copyOutFn) : defaultCopyCallBack);
}
// Performs promotion of a `subView` into a local buffer of the size of the
// *ranges* of the `subView`. This produces a buffer whose size may be bigger
// than the actual size of the `subView` at the boundaries.
// This is related to the full/partial tile problem.
// Returns a PromotionInfo containing a `buffer`, `fullLocalView` and
// `partialLocalView` such that:
// * `buffer` is always the size of the full tile.
// * `fullLocalView` is a dense contiguous view into that buffer.
// * `partialLocalView` is a dense non-contiguous slice of `fullLocalView`
// that corresponds to the size of `subView` and accounting for boundary
// effects.
// The point of the full tile buffer is that constant static tile sizes are
// folded and result in a buffer type with statically known size and alignment
// properties.
// To account for general boundary effects, padding must be performed on the
// boundary tiles. For now this is done with an unconditional `fill` op followed
// by a partial `copy` op.
static Optional<PromotionInfo>
promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, SubViewOp subView,
LinalgOpInstancePromotionOptions const &options,
OperationFolder *folder) {
auto viewType = subView.getType();
auto rank = viewType.getRank();
SmallVector<Value, 4> fullSizes, partialSizes;
fullSizes.reserve(rank);
partialSizes.reserve(rank);
for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) {
auto rangeValue = en.value();
// Try to extract a tight constant.
LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n");
Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size);
LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
fullSizes.push_back(size);
partialSizes.push_back(folded_std_dim(folder, subView, en.index()));
}
SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1);
// If a callback is not specified, then use the default implementation for
// allocating the promoted buffer.
Optional<Value> fullLocalView =
options.allocationFn(b, subView, fullSizes, folder);
if (!fullLocalView)
return {};
auto zero = folded_std_constant_index(folder, 0);
auto one = folded_std_constant_index(folder, 1);
SmallVector<Value, 4> zeros(fullSizes.size(), zero);
SmallVector<Value, 4> ones(fullSizes.size(), one);
auto partialLocalView =
folded_std_subview(folder, *fullLocalView, zeros, partialSizes, ones);
return PromotionInfo{*fullLocalView, partialLocalView};
}
static Optional<MapVector<unsigned, PromotionInfo>>
promoteSubViews(OpBuilder &b, Location loc,
LinalgOpInstancePromotionOptions options,
OperationFolder *folder) {
if (options.subViews.empty())
return {};
ScopedContext scope(b, loc);
MapVector<unsigned, PromotionInfo> promotionInfoMap;
for (auto v : options.subViews) {
SubViewOp subView = cast<SubViewOp>(v.second.getDefiningOp());
Optional<PromotionInfo> promotionInfo =
promoteSubviewAsNewBuffer(b, loc, subView, options, folder);
if (!promotionInfo)
return {};
promotionInfoMap[v.first] = *promotionInfo;
// Only fill the buffer if the full local view is used
if (!options.useFullTileBuffers[v.second])
continue;
Value fillVal;
if (auto t = subView.getType().getElementType().dyn_cast<FloatType>())
fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0));
else if (auto t =
subView.getType().getElementType().dyn_cast<IntegerType>())
fillVal = folded_std_constant_int(folder, 0, t);
linalg_fill(promotionInfo->fullLocalView, fillVal);
}
// Copy data into the promoted buffers. Use callback if provided.
for (auto v : options.subViews) {
auto info = promotionInfoMap.find(v.first);
if (info == promotionInfoMap.end())
continue;
if (failed(options.copyInFn(b, cast<SubViewOp>(v.second.getDefiningOp()),
info->second.partialLocalView)))
return {};
}
return promotionInfoMap;
}
static Optional<LinalgOp>
promoteSubViews(OpBuilder &b, LinalgOp op,
LinalgOpInstancePromotionOptions options,
OperationFolder *folder) {
assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
// TODO: add a level of indirection to linalg.generic.
if (convOp.padding())
return {};
}
// 1. Promote the specified views and use them in the new op.
auto loc = op.getLoc();
auto promotedBuffersAndViews = promoteSubViews(b, loc, options, folder);
if (!promotedBuffersAndViews ||
promotedBuffersAndViews->size() != options.subViews.size())
return {};
// 2. Append all other operands as they appear, this enforces that such
// operands are not views. This is to support cases such as FillOp taking
// extra scalars etc. Keep a reference to output buffers;
SmallVector<Value, 8> opViews;
opViews.reserve(op.getNumInputsAndOutputs());
SmallVector<std::pair<Value, Value>, 8> writebackViews;
writebackViews.reserve(promotedBuffersAndViews->size());
for (auto view : llvm::enumerate(op.getInputsAndOutputBuffers())) {
if (options.subViews.count(view.index()) != 0) {
if (options.useFullTileBuffers[view.value()])
opViews.push_back(
(*promotedBuffersAndViews)[view.index()].fullLocalView);
else
opViews.push_back(
(*promotedBuffersAndViews)[view.index()].partialLocalView);
if (view.index() >= op.getNumInputs())
writebackViews.emplace_back(std::make_pair(
view.value(),
(*promotedBuffersAndViews)[view.index()].partialLocalView));
} else {
opViews.push_back(view.value());
}
}
op.getOperation()->setOperands(0, opViews.size(), opViews);
OpBuilder::InsertionGuard guard(b);
b.setInsertionPointAfter(op);
ScopedContext scope(b, loc);
// 3. Emit write-back for the promoted output views: copy the partial view.
for (auto viewAndPartialLocalView : writebackViews) {
if (failed(options.copyOutFn(b, viewAndPartialLocalView.second,
viewAndPartialLocalView.first)))
return {};
}
// 4. Dealloc all local buffers.
for (const auto &pi : *promotedBuffersAndViews)
options.deallocationFn(b, pi.second.fullLocalView);
return op;
}
LogicalResult
mlir::linalg::promoteSubviewsPrecondition(Operation *op,
LinalgPromotionOptions options) {
LinalgOp linOp = dyn_cast<LinalgOp>(op);
// Transformation applies to buffers only.
if (!linOp || !linOp.hasBufferSemantics())
return failure();
// Check that at least one of the requested operands is indeed a subview.
for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) {
auto sv = isa_and_nonnull<SubViewOp>(en.value().getDefiningOp());
if (sv) {
if (!options.operandsToPromote.hasValue() ||
options.operandsToPromote->count(en.index()))
return success();
}
}
// TODO: Check all subviews requested are bound by a static constant.
// TODO: Check that the total footprint fits within a given size.
return failure();
}
Optional<LinalgOp> mlir::linalg::promoteSubViews(OpBuilder &b,
LinalgOp linalgOp,
LinalgPromotionOptions options,
OperationFolder *folder) {
LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options);
return ::promoteSubViews(
b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder);
}
namespace {
struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> {
LinalgPromotionPass() = default;
LinalgPromotionPass(bool dynamicBuffers, bool useAlloca) {
this->dynamicBuffers = dynamicBuffers;
this->useAlloca = useAlloca;
}
void runOnFunction() override {
OperationFolder folder(&getContext());
getFunction().walk([this, &folder](LinalgOp op) {
auto options = LinalgPromotionOptions()
.setDynamicBuffers(dynamicBuffers)
.setUseAlloca(useAlloca);
if (failed(promoteSubviewsPrecondition(op, options)))
return;
LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n");
OpBuilder b(op);
promoteSubViews(b, op, options, &folder);
});
}
};
} // namespace
// TODO: support more transformation options in the pass.
std::unique_ptr<OperationPass<FuncOp>>
mlir::createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca) {
return std::make_unique<LinalgPromotionPass>(dynamicBuffers, useAlloca);
}
std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() {
return std::make_unique<LinalgPromotionPass>();
}