#include "AMDGPUIGroupLP.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/TargetOpcodes.h"
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
namespace {
static cl::opt<bool>
EnableIGroupLP("amdgpu-igrouplp",
cl::desc("Enable construction of Instruction Groups and "
"their ordering for scheduling"),
cl::init(false));
static cl::opt<Optional<unsigned>>
VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None),
cl::Hidden,
cl::desc("The maximum number of instructions to include "
"in VMEM group."));
static cl::opt<Optional<unsigned>>
MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None),
cl::Hidden,
cl::desc("The maximum number of instructions to include "
"in MFMA group."));
static cl::opt<Optional<unsigned>>
LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None),
cl::Hidden,
cl::desc("The maximum number of instructions to include "
"in lds/gds read group."));
static cl::opt<Optional<unsigned>>
LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None),
cl::Hidden,
cl::desc("The maximum number of instructions to include "
"in lds/gds write group."));
typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)>
CanAddMIFn;
class SchedGroup {
private:
const CanAddMIFn canAddMI;
Optional<unsigned> MaxSize;
SmallVector<SUnit *, 32> Collection;
ScheduleDAGInstrs *DAG;
void tryAddEdge(SUnit *A, SUnit *B) {
if (A != B && DAG->canAddEdge(B, A)) {
DAG->addEdge(B, SDep(A, SDep::Artificial));
LLVM_DEBUG(dbgs() << "Adding edge...\n"
<< "from: SU(" << A->NodeNum << ") " << *A->getInstr()
<< "to: SU(" << B->NodeNum << ") " << *B->getInstr());
}
}
public:
void link(SUnit &SU, bool MakePred = false) {
for (auto A : Collection) {
SUnit *B = &SU;
if (MakePred)
std::swap(A, B);
tryAddEdge(A, B);
}
}
void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) {
for (auto A : Collection) {
SUnit *B = &SU;
if (P(A, B))
std::swap(A, B);
tryAddEdge(A, B);
}
}
void link(SchedGroup &OtherGroup) {
for (auto B : OtherGroup.Collection)
link(*B);
}
bool isFull() { return MaxSize && Collection.size() >= *MaxSize; }
bool canAddSU(SUnit &SU, const SIInstrInfo *TII) {
if (isFull())
return false;
MachineInstr &MI = *SU.getInstr();
if (MI.getOpcode() != TargetOpcode::BUNDLE)
return canAddMI(MI, TII);
const MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B;
while (E != MBB->end() && E->isBundledWithPred())
++E;
return std::all_of(
B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); });
}
void add(SUnit &SU) { Collection.push_back(&SU); }
SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize,
ScheduleDAGInstrs *DAG)
: canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {}
};
bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return TII->isMFMA(MI);
}
bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return TII->isVALU(MI) && !TII->isMFMA(MI);
}
bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return TII->isSALU(MI);
}
bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI));
}
bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return MI.mayLoad() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
}
bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return MI.mayStore() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
}
bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return MI.mayStore() && TII->isDS(MI);
}
bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
return MI.mayLoad() && TII->isDS(MI);
}
class IGroupLPDAGMutation : public ScheduleDAGMutation {
public:
const SIInstrInfo *TII;
ScheduleDAGMI *DAG;
IGroupLPDAGMutation() = default;
void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
class SchedBarrierDAGMutation : public ScheduleDAGMutation {
private:
const SIInstrInfo *TII;
ScheduleDAGMI *DAG;
enum class SchedBarrierMasks {
NONE = 0u,
ALU = 1u << 0,
VALU = 1u << 1,
SALU = 1u << 2,
MFMA = 1u << 3,
VMEM = 1u << 4,
VMEM_READ = 1u << 5,
VMEM_WRITE = 1u << 6,
DS = 1u << 7,
DS_READ = 1u << 8,
DS_WRITE = 1u << 9,
LLVM_MARK_AS_BITMASK_ENUM( DS_WRITE)
};
std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr;
std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr;
std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr;
std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr;
std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr;
std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr;
std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr;
void getSchedGroupsFromMask(int32_t Mask,
SmallVectorImpl<SchedGroup *> &SchedGroups);
void addSchedBarrierEdges(SUnit &SU);
void initSchedGroup(SchedGroup *SG);
void resetSchedBarrierEdges(SUnit &SU);
public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
SchedBarrierDAGMutation() = default;
};
void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
if (!TSchedModel || DAG->SUnits.empty())
return;
LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n");
SmallVector<SchedGroup, 4> PipelineOrderGroups = {
SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG),
SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG),
SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG),
SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)};
for (SUnit &SU : DAG->SUnits) {
LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU));
for (auto &SG : PipelineOrderGroups)
if (SG.canAddSU(SU, TII))
SG.add(SU);
}
for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) {
auto &GroupA = PipelineOrderGroups[i];
for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) {
auto &GroupB = PipelineOrderGroups[j];
GroupA.link(GroupB);
}
}
}
void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
if (!TSchedModel || DAGInstrs->SUnits.empty())
return;
LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n");
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
for (auto &SU : DAG->SUnits)
if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER)
addSchedBarrierEdges(SU);
}
void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
MachineInstr &MI = *SchedBarrier.getInstr();
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
resetSchedBarrierEdges(SchedBarrier);
SmallVector<SchedGroup *, 4> SchedGroups;
int32_t Mask = MI.getOperand(0).getImm();
getSchedGroupsFromMask(Mask, SchedGroups);
for (auto SG : SchedGroups)
SG->link(
SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[](
const SUnit *A, const SUnit *B) {
return A->NodeNum > B->NodeNum;
});
}
void SchedBarrierDAGMutation::getSchedGroupsFromMask(
int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) {
SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask;
if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
if (!VALUSchedGroup) {
VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG);
initSchedGroup(VALUSchedGroup.get());
}
SchedGroups.push_back(VALUSchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
if (!SALUSchedGroup) {
SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG);
initSchedGroup(SALUSchedGroup.get());
}
SchedGroups.push_back(SALUSchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
if (!MFMASchedGroup) {
MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG);
initSchedGroup(MFMASchedGroup.get());
}
SchedGroups.push_back(MFMASchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
if (!VMEMReadSchedGroup) {
VMEMReadSchedGroup =
std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG);
initSchedGroup(VMEMReadSchedGroup.get());
}
SchedGroups.push_back(VMEMReadSchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
if (!VMEMWriteSchedGroup) {
VMEMWriteSchedGroup =
std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG);
initSchedGroup(VMEMWriteSchedGroup.get());
}
SchedGroups.push_back(VMEMWriteSchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
if (!DSReadSchedGroup) {
DSReadSchedGroup =
std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG);
initSchedGroup(DSReadSchedGroup.get());
}
SchedGroups.push_back(DSReadSchedGroup.get());
}
if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE &&
(SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
if (!DSWriteSchedGroup) {
DSWriteSchedGroup =
std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG);
initSchedGroup(DSWriteSchedGroup.get());
}
SchedGroups.push_back(DSWriteSchedGroup.get());
}
}
void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) {
assert(SG);
for (auto &SU : DAG->SUnits)
if (SG->canAddSU(SU, TII))
SG->add(SU);
}
void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) {
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER);
for (auto &P : SU.Preds)
SU.removePred(P);
for (auto &S : SU.Succs) {
for (auto &SP : S.getSUnit()->Preds) {
if (SP.getSUnit() == &SU) {
S.getSUnit()->removePred(SP);
}
}
}
}
}
namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() {
return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr;
}
std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() {
return std::make_unique<SchedBarrierDAGMutation>();
}
}