AMDGPUExportClustering.cpp
4.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file contains a DAG scheduling mutation to cluster shader
/// exports.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUExportClustering.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
using namespace llvm;
namespace {
class ExportClustering : public ScheduleDAGMutation {
public:
ExportClustering() {}
void apply(ScheduleDAGInstrs *DAG) override;
};
static bool isExport(const SUnit &SU) {
const MachineInstr *MI = SU.getInstr();
return MI->getOpcode() == AMDGPU::EXP ||
MI->getOpcode() == AMDGPU::EXP_DONE;
}
static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
return Imm >= 12 && Imm <= 15;
}
static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
unsigned PosCount) {
if (!PosCount || PosCount == Chain.size())
return;
// Position exports should occur as soon as possible in the shader
// for optimal performance. This moves position exports before
// other exports while preserving the order within different export
// types (pos or other).
SmallVector<SUnit *, 8> Copy(Chain);
unsigned PosIdx = 0;
unsigned OtherIdx = PosCount;
for (SUnit *SU : Copy) {
if (isPositionExport(TII, SU))
Chain[PosIdx++] = SU;
else
Chain[OtherIdx++] = SU;
}
}
static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
SUnit *ChainHead = Exports.front();
// Now construct cluster from chain by adding new edges.
for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
SUnit *SUa = Exports[Idx];
SUnit *SUb = Exports[Idx + 1];
// Copy all dependencies to the head of the chain to avoid any
// computation being inserted into the chain.
for (const SDep &Pred : SUb->Preds) {
SUnit *PredSU = Pred.getSUnit();
if (!isExport(*PredSU) && !Pred.isWeak())
DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
}
// New barrier edge ordering exports
DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
// Also add cluster edge
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
}
}
static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
SmallVector<SDep, 2> ToAdd, ToRemove;
for (const SDep &Pred : SU.Preds) {
SUnit *PredSU = Pred.getSUnit();
if (Pred.isBarrier() && isExport(*PredSU)) {
ToRemove.push_back(Pred);
if (isExport(SU))
continue;
// If we remove a barrier we need to copy dependencies
// from the predecessor to maintain order.
for (const SDep &ExportPred : PredSU->Preds) {
SUnit *ExportPredSU = ExportPred.getSUnit();
if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
}
}
}
for (SDep Pred : ToRemove)
SU.removePred(Pred);
for (SDep Pred : ToAdd)
DAG->addEdge(&SU, Pred);
}
void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
SmallVector<SUnit *, 8> Chain;
// Pass through DAG gathering a list of exports and removing barrier edges
// creating dependencies on exports. Freeing exports of successor edges
// allows more scheduling freedom, and nothing should be order dependent
// on exports. Edges will be added later to order the exports.
unsigned PosCount = 0;
for (SUnit &SU : DAG->SUnits) {
if (!isExport(SU))
continue;
Chain.push_back(&SU);
if (isPositionExport(TII, &SU))
PosCount++;
removeExportDependencies(DAG, SU);
SmallVector<SDep, 4> Succs(SU.Succs);
for (SDep Succ : Succs)
removeExportDependencies(DAG, *Succ.getSUnit());
}
// Apply clustering if there are multiple exports
if (Chain.size() > 1) {
sortChain(TII, Chain, PosCount);
buildCluster(Chain, DAG);
}
}
} // end namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
return std::make_unique<ExportClustering>();
}
} // end namespace llvm