SCFToGPUPass.cpp
2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
#include "../PassDetail.h"
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/CommandLine.h"
using namespace mlir;
using namespace mlir::scf;
namespace {
// A pass that traverses top-level loops in the function and converts them to
// GPU launch operations. Nested launches are not allowed, so this does not
// walk the function recursively to avoid considering nested loops.
struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
ForLoopMapper() = default;
ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
this->numBlockDims = numBlockDims;
this->numThreadDims = numThreadDims;
}
void runOnFunction() override {
for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
if (auto forOp = dyn_cast<AffineForOp>(&op)) {
if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
numThreadDims)))
signalPassFailure();
}
}
}
};
struct ParallelLoopToGpuPass
: public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
void runOnOperation() override {
OwningRewritePatternList patterns;
populateParallelLoopToGPUPatterns(patterns, &getContext());
ConversionTarget target(getContext());
target.addLegalDialect<StandardOpsDialect>();
target.addLegalDialect<AffineDialect>();
target.addLegalDialect<gpu::GPUDialect>();
target.addLegalDialect<scf::SCFDialect>();
target.addIllegalOp<scf::ParallelOp>();
if (failed(applyPartialConversion(getOperation(), target, patterns)))
signalPassFailure();
}
};
} // namespace
std::unique_ptr<OperationPass<FuncOp>>
mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
}
std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
return std::make_unique<ForLoopMapper>();
}
std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
return std::make_unique<ParallelLoopToGpuPass>();
}