#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "si-opt-vgpr-liverange"
namespace {
class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
LiveVariables *LV = nullptr;
MachineDominatorTree *MDT = nullptr;
const MachineLoopInfo *Loops = nullptr;
MachineRegisterInfo *MRI = nullptr;
public:
static char ID;
MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
void collectElseRegionBlocks(MachineBasicBlock *Flow,
MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &) const;
void
collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
SmallVectorImpl<Register> &CandidateRegs) const;
void collectWaterfallCandidateRegisters(
MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
SmallSetVector<Register, 16> &CandidateRegs,
SmallSetVector<MachineBasicBlock *, 2> &Blocks,
SmallVectorImpl<MachineInstr *> &Instructions) const;
void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &Uses) const;
void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
MachineBasicBlock *Flow) const;
void updateLiveRangeInElseRegion(
Register Reg, Register NewReg, MachineBasicBlock *Flow,
MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
void
optimizeLiveRange(Register Reg, MachineBasicBlock *If,
MachineBasicBlock *Flow, MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
void optimizeWaterfallLiveRange(
Register Reg, MachineBasicBlock *LoopHeader,
SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
SmallVectorImpl<MachineInstr *> &Instructions) const;
SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "SI Optimize VGPR LiveRange";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveVariables>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
MachineFunctionProperties getClearedProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoPHIs);
}
};
}
MachineBasicBlock *
SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
for (auto &BR : MBB->terminators()) {
if (BR.getOpcode() == AMDGPU::SI_ELSE)
return BR.getOperand(2).getMBB();
}
return nullptr;
}
void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
MachineBasicBlock *Flow, MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
assert(Flow != Endif);
MachineBasicBlock *MBB = Endif;
unsigned Cur = 0;
while (MBB) {
for (auto *Pred : MBB->predecessors()) {
if (Pred != Flow && !Blocks.contains(Pred))
Blocks.insert(Pred);
}
if (Cur < Blocks.size())
MBB = Blocks[Cur++];
else
MBB = nullptr;
}
LLVM_DEBUG({
dbgs() << "Found Else blocks: ";
for (auto *MBB : Blocks)
dbgs() << printMBBReference(*MBB) << ' ';
dbgs() << '\n';
});
}
void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
Register Reg, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &Uses) const {
for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
if (UseMI.getParent() == MBB && !UseMI.isPHI())
Uses.push_back(&UseMI);
}
}
void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
SmallVectorImpl<Register> &CandidateRegs) const {
SmallSet<Register, 8> KillsInElse;
for (auto *Else : ElseBlocks) {
for (auto &MI : Else->instrs()) {
if (MI.isDebugInstr())
continue;
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg() || MO.isDef())
continue;
Register MOReg = MO.getReg();
if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
continue;
if (MO.readsReg()) {
LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
KillsInElse.insert(MOReg);
} else {
LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
<< " as Live in Endif\n");
}
}
}
}
}
}
for (auto &MI : Endif->phis()) {
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
auto &MO = MI.getOperand(Idx);
auto *Pred = MI.getOperand(Idx + 1).getMBB();
if (Pred == Flow)
continue;
assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
if (!MO.isReg() || !MO.getReg() || MO.isUndef())
continue;
Register Reg = MO.getReg();
if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
continue;
LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
if (VI.isLiveIn(*Endif, Reg, *MRI)) {
LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
<< " as Live in Endif\n");
continue;
}
const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
KillsInElse.insert(Reg);
}
}
auto IsLiveThroughThen = [&](Register Reg) {
for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
++I) {
if (!I->readsReg())
continue;
auto *UseMI = I->getParent();
auto *UseMBB = UseMI->getParent();
if (UseMBB == Flow || UseMBB == Endif) {
if (!UseMI->isPHI())
return true;
auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
if ((UseMBB == Flow && IncomingMBB != If) ||
(UseMBB == Endif && IncomingMBB == Flow))
return true;
}
}
return false;
};
for (auto Reg : KillsInElse) {
if (!IsLiveThroughThen(Reg))
CandidateRegs.push_back(Reg);
}
}
void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
SmallSetVector<Register, 16> &CandidateRegs,
SmallSetVector<MachineBasicBlock *, 2> &Blocks,
SmallVectorImpl<MachineInstr *> &Instructions) const {
auto *MBB = LoopHeader;
for (;;) {
Blocks.insert(MBB);
for (auto &MI : *MBB) {
if (MI.isDebugInstr())
continue;
Instructions.push_back(&MI);
}
if (MBB == LoopEnd)
break;
if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
(MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
return;
}
MBB = *MBB->succ_begin();
}
for (auto *I : Instructions) {
auto &MI = *I;
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg() || MO.isDef())
continue;
Register MOReg = MO.getReg();
if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
continue;
if (MO.readsReg()) {
MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
bool IsUsed = false;
for (auto *Succ : LoopEnd->successors()) {
if (!Blocks.contains(Succ) &&
OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
IsUsed = true;
break;
}
}
if (!IsUsed) {
LLVM_DEBUG(dbgs() << "Found candidate reg: "
<< printReg(MOReg, TRI, 0, MRI) << '\n');
CandidateRegs.insert(MOReg);
} else {
LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
<< printReg(MOReg, TRI, 0, MRI) << '\n');
}
}
}
}
}
}
void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
SetVector<MachineBasicBlock *> Blocks;
SmallVector<MachineBasicBlock *> WorkList({If});
while (!WorkList.empty()) {
auto *MBB = WorkList.pop_back_val();
for (auto *Succ : MBB->successors()) {
if (Succ != Flow && !Blocks.contains(Succ)) {
WorkList.push_back(Succ);
Blocks.insert(Succ);
}
}
}
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
for (MachineBasicBlock *MBB : Blocks) {
LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
<< '\n');
OldVarInfo.AliveBlocks.reset(MBB->getNumber());
}
SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
++I) {
auto *UseMI = I->getParent();
if (UseMI->isPHI() && I->readsReg()) {
if (Blocks.contains(UseMI->getParent()))
PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
}
}
for (MachineBasicBlock *MBB : Blocks) {
SmallVector<MachineInstr *> Uses;
findNonPHIUsesInBlock(Reg, MBB, Uses);
if (Uses.size() == 1) {
LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
<< printMBBReference(*MBB) << '\n');
LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
} else if (Uses.size() > 1) {
LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
<< printMBBReference(*MBB) << '\n');
for (MachineInstr &MI : *MBB) {
if (llvm::is_contained(Uses, &MI))
LV->HandleVirtRegUse(Reg, MBB, MI);
}
}
if (PHIIncoming.contains(MBB))
LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
MBB);
}
for (auto *MI : OldVarInfo.Kills) {
if (Blocks.contains(MI->getParent()))
MI->addRegisterKilled(Reg, TRI);
}
}
void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
Register Reg, Register NewReg, MachineBasicBlock *Flow,
MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
for (auto *MBB : ElseBlocks) {
unsigned BBNum = MBB->getNumber();
if (OldVarInfo.AliveBlocks.test(BBNum)) {
NewVarInfo.AliveBlocks.set(BBNum);
LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
<< '\n');
OldVarInfo.AliveBlocks.reset(BBNum);
}
}
auto I = OldVarInfo.Kills.begin();
while (I != OldVarInfo.Kills.end()) {
if (ElseBlocks.contains((*I)->getParent())) {
NewVarInfo.Kills.push_back(*I);
I = OldVarInfo.Kills.erase(I);
} else {
++I;
}
}
}
void SIOptimizeVGPRLiveRange::optimizeLiveRange(
Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
const auto *RC = MRI->getRegClass(Reg);
Register NewReg = MRI->createVirtualRegister(RC);
Register UndefReg = MRI->createVirtualRegister(RC);
MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
TII->get(TargetOpcode::PHI), NewReg);
for (auto *Pred : Flow->predecessors()) {
if (Pred == If)
PHI.addReg(Reg).addMBB(Pred);
else
PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
}
for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
auto *UseMI = O.getParent();
auto *UseBlock = UseMI->getParent();
if (UseBlock == Endif) {
assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
O.setReg(NewReg);
continue;
}
if (ElseBlocks.contains(UseBlock))
O.setReg(NewReg);
}
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
OldVarInfo.AliveBlocks.reset(Flow->getNumber());
updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
updateLiveRangeInThenRegion(Reg, If, Flow);
}
void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
Register Reg, MachineBasicBlock *LoopHeader,
SmallSetVector<MachineBasicBlock *, 2> &Blocks,
SmallVectorImpl<MachineInstr *> &Instructions) const {
LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
const auto *RC = MRI->getRegClass(Reg);
Register NewReg = MRI->createVirtualRegister(RC);
Register UndefReg = MRI->createVirtualRegister(RC);
for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
auto *UseMI = O.getParent();
auto *UseBlock = UseMI->getParent();
if (Blocks.contains(UseBlock))
O.setReg(NewReg);
}
MachineInstrBuilder PHI =
BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
TII->get(TargetOpcode::PHI), NewReg);
for (auto *Pred : LoopHeader->predecessors()) {
if (Blocks.contains(Pred))
PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
else
PHI.addReg(Reg).addMBB(Pred);
}
LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
MachineInstr *Kill = nullptr;
for (auto *MI : reverse(Instructions)) {
if (MI->readsRegister(NewReg, TRI)) {
MI->addRegisterKilled(NewReg, TRI);
NewVarInfo.Kills.push_back(MI);
Kill = MI;
break;
}
}
assert(Kill && "Failed to find last usage of register in loop");
MachineBasicBlock *KillBlock = Kill->getParent();
bool PostKillBlock = false;
for (auto *Block : Blocks) {
auto BBNum = Block->getNumber();
OldVarInfo.AliveBlocks.reset(BBNum);
PostKillBlock |= (Block == KillBlock);
if (PostKillBlock) {
NewVarInfo.AliveBlocks.reset(BBNum);
} else if (Block != LoopHeader) {
NewVarInfo.AliveBlocks.set(BBNum);
}
}
}
char SIOptimizeVGPRLiveRange::ID = 0;
INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
"SI Optimize VGPR LiveRange", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
"SI Optimize VGPR LiveRange", false, false)
char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
return new SIOptimizeVGPRLiveRange();
}
bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MDT = &getAnalysis<MachineDominatorTree>();
Loops = &getAnalysis<MachineLoopInfo>();
LV = &getAnalysis<LiveVariables>();
MRI = &MF.getRegInfo();
if (skipFunction(MF.getFunction()))
return false;
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (auto &MI : MBB.terminators()) {
if (MI.getOpcode() == AMDGPU::SI_IF) {
MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
auto *Endif = getElseTarget(IfTarget);
if (!Endif)
continue;
if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
continue;
SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
SmallVector<Register> CandidateRegs;
LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
<< printMBBReference(MBB) << ' '
<< printMBBReference(*IfTarget) << ' '
<< printMBBReference(*Endif) << '\n');
collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
CandidateRegs);
MadeChange |= !CandidateRegs.empty();
for (auto Reg : CandidateRegs)
optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
} else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
auto *LoopHeader = MI.getOperand(0).getMBB();
auto *LoopEnd = &MBB;
LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
<< printMBBReference(*LoopHeader) << '\n');
SmallSetVector<Register, 16> CandidateRegs;
SmallVector<MachineInstr *, 16> Instructions;
SmallSetVector<MachineBasicBlock *, 2> Blocks;
collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
Blocks, Instructions);
MadeChange |= !CandidateRegs.empty();
for (auto Reg : CandidateRegs)
optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
}
}
}
return MadeChange;
}