#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
#define DEBUG_TYPE "si-insert-hard-clauses"
namespace {
constexpr unsigned MaxInstructionsInClause = 63;
enum HardClauseType {
HARDCLAUSE_VMEM,
HARDCLAUSE_FLAT,
HARDCLAUSE_MIMG_LOAD,
HARDCLAUSE_MIMG_STORE,
HARDCLAUSE_MIMG_ATOMIC,
HARDCLAUSE_MIMG_SAMPLE,
HARDCLAUSE_VMEM_LOAD,
HARDCLAUSE_VMEM_STORE,
HARDCLAUSE_VMEM_ATOMIC,
HARDCLAUSE_FLAT_LOAD,
HARDCLAUSE_FLAT_STORE,
HARDCLAUSE_FLAT_ATOMIC,
HARDCLAUSE_BVH,
HARDCLAUSE_LDS,
HARDCLAUSE_SMEM,
HARDCLAUSE_VALU,
LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
HARDCLAUSE_INTERNAL,
HARDCLAUSE_IGNORE,
HARDCLAUSE_ILLEGAL,
};
class SIInsertHardClauses : public MachineFunctionPass {
public:
static char ID;
const GCNSubtarget *ST = nullptr;
SIInsertHardClauses() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
HardClauseType getHardClauseType(const MachineInstr &MI) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (ST->hasNSAClauseBug()) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
return HARDCLAUSE_ILLEGAL;
}
return HARDCLAUSE_VMEM;
}
if (SIInstrInfo::isFLAT(MI))
return HARDCLAUSE_FLAT;
} else {
assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
if (SIInstrInfo::isMIMG(MI)) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
if (BaseInfo->BVH)
return HARDCLAUSE_BVH;
if (BaseInfo->Sampler)
return HARDCLAUSE_MIMG_SAMPLE;
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
: HARDCLAUSE_MIMG_LOAD
: HARDCLAUSE_MIMG_STORE;
}
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
: HARDCLAUSE_VMEM_LOAD
: HARDCLAUSE_VMEM_STORE;
}
if (SIInstrInfo::isFLAT(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
: HARDCLAUSE_FLAT_LOAD
: HARDCLAUSE_FLAT_STORE;
}
}
if (SIInstrInfo::isSMRD(MI))
return HARDCLAUSE_SMEM;
}
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
if (MI.isMetaInstruction())
return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
struct ClauseInfo {
HardClauseType Type = HARDCLAUSE_ILLEGAL;
MachineInstr *First = nullptr;
MachineInstr *Last = nullptr;
unsigned Length = 0;
unsigned TrailingInternalLength = 0;
SmallVector<const MachineOperand *, 4> BaseOps;
};
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
if (CI.First == CI.Last)
return false;
assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
auto &MBB = *CI.First->getParent();
auto ClauseMI =
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
.addImm(CI.Length - 1);
finalizeBundle(MBB, ClauseMI->getIterator(),
std::next(CI.Last->getIterator()));
return true;
}
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
ST = &MF.getSubtarget<GCNSubtarget>();
if (!ST->hasHardClauses())
return false;
const SIInstrInfo *SII = ST->getInstrInfo();
const TargetRegisterInfo *TRI = ST->getRegisterInfo();
bool Changed = false;
for (auto &MBB : MF) {
ClauseInfo CI;
for (auto &MI : MBB) {
HardClauseType Type = getHardClauseType(MI);
int64_t Dummy1;
bool Dummy2;
unsigned Dummy3;
SmallVector<const MachineOperand *, 4> BaseOps;
if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
Dummy3, TRI)) {
Type = HARDCLAUSE_ILLEGAL;
}
}
if (CI.Length == MaxInstructionsInClause ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
!SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
Changed |= emitClause(CI, SII);
CI = ClauseInfo();
}
if (CI.Length) {
if (Type != HARDCLAUSE_IGNORE) {
if (Type == HARDCLAUSE_INTERNAL) {
++CI.TrailingInternalLength;
} else {
++CI.Length;
CI.Length += CI.TrailingInternalLength;
CI.TrailingInternalLength = 0;
CI.Last = &MI;
CI.BaseOps = std::move(BaseOps);
}
}
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
}
}
if (CI.Length)
Changed |= emitClause(CI, SII);
}
return Changed;
}
};
}
char SIInsertHardClauses::ID = 0;
char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
false, false)