InlineSizeEstimatorAnalysis.cpp
10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements feature and label extraction for offline supervised learning
// of a IR to native size model.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
#ifdef LLVM_HAVE_TF_API
#include "llvm/Analysis/Utils/TFUtils.h"
#endif
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <deque>
using namespace llvm;
AnalysisKey InlineSizeEstimatorAnalysis::Key;
#define DEBUG_TYPE "inline-size-estimator"
#ifdef LLVM_HAVE_TF_API
cl::opt<std::string> TFIR2NativeModelPath(
"ml-inliner-ir2native-model", cl::Hidden,
cl::desc("Path to saved model evaluating native size from IR."));
namespace {
unsigned getMaxInstructionID() {
#define LAST_OTHER_INST(NR) return NR;
#include "llvm/IR/Instruction.def"
}
class IRToNativeSizeLearning {
public:
enum class NamedFeatureIndex : size_t {
InitialSize,
Blocks,
Calls,
IsLocal,
IsLinkOnceODR,
IsLinkOnce,
Loops,
MaxLoopDepth,
MaxDomTreeLevel,
NumNamedFeatures
};
static const size_t NumNamedFeatures =
static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
struct FunctionFeatures {
static const size_t FeatureCount;
std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
std::vector<int32_t> InstructionHistogram;
std::vector<int32_t> InstructionPairHistogram;
void fillTensor(int32_t *Ptr) const;
int32_t &operator[](NamedFeatureIndex Pos) {
return NamedFeatures[static_cast<size_t>(Pos)];
}
};
IRToNativeSizeLearning() = default;
static FunctionFeatures getFunctionFeatures(Function &F,
FunctionAnalysisManager &FAM);
};
// This is a point in time - we determined including these pairs of
// consecutive instructions (in the IR layout available at inline time) as
// features improves the model performance. We want to move away from manual
// feature selection.
// The array is given in opcode pairs rather than labels because 1) labels
// weren't readily available, and 2) the successions were hand - extracted.
//
// This array must be sorted.
static const std::array<std::pair<size_t, size_t>, 137>
ImportantInstructionSuccessions{
{{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
{1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
{1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
{1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
{2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
{2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
{13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
{28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
{31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
{32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
{32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
{33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
{34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
{39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
{49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
{49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
{53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
{56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
{56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
{64, 1}, {64, 64}, {65, 1}, {65, 65}}};
// We have: 9 calculated features (the features here); 1 feature for each
// instruction opcode; and 1 feature for each manually-identified sequence.
// For the latter 2, we build a histogram: we count the number of
// occurrences of each instruction opcode or succession of instructions,
// respectively.
// Note that instruction opcodes start from 1. For convenience, we also have an
// always 0 feature for the '0' opcode, hence the extra 1.
const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
IRToNativeSizeLearning::NumNamedFeatures;
size_t getSize(Function &F, TargetTransformInfo &TTI) {
size_t Ret = 0;
for (const auto &BB : F)
for (const auto &I : BB)
Ret += TTI.getInstructionCost(
&I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
return Ret;
}
size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
return getSize(F, TTI);
}
unsigned getMaxDominatorTreeDepth(const Function &F,
const DominatorTree &Tree) {
unsigned Ret = 0;
for (const auto &BB : F)
if (const auto *TN = Tree.getNode(&BB))
Ret = std::max(Ret, TN->getLevel());
return Ret;
}
} // namespace
IRToNativeSizeLearning::FunctionFeatures
IRToNativeSizeLearning::getFunctionFeatures(Function &F,
FunctionAnalysisManager &FAM) {
assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
"expected function features are sorted");
auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
FunctionFeatures FF;
size_t InstrCount = getMaxInstructionID() + 1;
FF.InstructionHistogram.resize(InstrCount);
FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
int StartID = 0;
int LastID = StartID;
auto getPairIndex = [](size_t a, size_t b) {
auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
if (I == ImportantInstructionSuccessions.end())
return -1;
return static_cast<int>(
std::distance(ImportantInstructionSuccessions.begin(), I));
};
// We don't want debug calls, because they'd just add noise.
for (const auto &BB : F) {
for (const auto &I : BB.instructionsWithoutDebug()) {
auto ID = I.getOpcode();
++FF.InstructionHistogram[ID];
int PairIndex = getPairIndex(LastID, ID);
if (PairIndex >= 0)
++FF.InstructionPairHistogram[PairIndex];
LastID = ID;
if (isa<CallBase>(I))
++FF[NamedFeatureIndex::Calls];
}
}
FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
FF[NamedFeatureIndex::Blocks] =
std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
auto &LI = FAM.getResult<LoopAnalysis>(F);
FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
for (auto &L : LI)
FF[NamedFeatureIndex::MaxLoopDepth] =
std::max(FF[NamedFeatureIndex::MaxLoopDepth],
static_cast<int32_t>(L->getLoopDepth()));
FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
return FF;
}
void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
Ptr += NamedFeatures.size();
std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
Ptr += InstructionHistogram.size();
std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
Ptr);
}
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
return !TFIR2NativeModelPath.empty();
}
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
if (!isEvaluatorRequested()) {
return;
}
std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
"serving_default_input_1",
{1, static_cast<int64_t>(
IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
std::vector<TensorSpec> OutputSpecs{
TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
Evaluator = std::make_unique<TFModelEvaluator>(
TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
if (!Evaluator || !Evaluator->isValid()) {
Evaluator.reset();
return;
}
}
InlineSizeEstimatorAnalysis::Result
InlineSizeEstimatorAnalysis::run(const Function &F,
FunctionAnalysisManager &FAM) {
if (!Evaluator)
return None;
auto Features = IRToNativeSizeLearning::getFunctionFeatures(
const_cast<Function &>(F), FAM);
int32_t *V = Evaluator->getInput<int32_t>(0);
Features.fillTensor(V);
auto ER = Evaluator->evaluate();
if (!ER)
return None;
float Ret = *ER->getTensorValue<float>(0);
if (Ret < 0.0)
Ret = 0.0;
return static_cast<size_t>(Ret);
}
InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
InlineSizeEstimatorAnalysis &&Other)
: Evaluator(std::move(Other.Evaluator)) {}
#else
namespace llvm {
class TFModelEvaluator {};
} // namespace llvm
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
InlineSizeEstimatorAnalysis &&) {}
InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis::Result
InlineSizeEstimatorAnalysis::run(const Function &F,
FunctionAnalysisManager &FAM) {
return None;
}
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
#endif
PreservedAnalyses
InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
<< ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
return PreservedAnalyses::all();
}