#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "frame-info"
static cl::opt<bool> EnableRedZone("aarch64-redzone",
cl::desc("enable use of redzone on AArch64"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
ReverseCSRRestoreSeq("reverse-csr-restore-seq",
cl::desc("reverse the CSR restore sequence"),
cl::init(false), cl::Hidden);
static cl::opt<bool> StackTaggingMergeSetTag(
"stack-tagging-merge-settag",
cl::desc("merge settag instruction in function epilog"), cl::init(true),
cl::Hidden);
static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
"optimization (default = off)"));
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static int64_t getArgumentStackToRestore(MachineFunction &MF,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
bool IsTailCallReturn = false;
if (MBB.end() != MBBI) {
unsigned RetOpcode = MBBI->getOpcode();
IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
RetOpcode == AArch64::TCRETURNri ||
RetOpcode == AArch64::TCRETURNriBTI;
}
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
int64_t ArgumentPopSize = 0;
if (IsTailCallReturn) {
MachineOperand &StackAdjust = MBBI->getOperand(1);
ArgumentPopSize = StackAdjust.getImm();
} else {
ArgumentPopSize = AFI->getArgumentStackToRestore();
}
return ArgumentPopSize;
}
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
bool AArch64FrameLowering::homogeneousPrologEpilog(
MachineFunction &MF, MachineBasicBlock *Exit) const {
if (!MF.getFunction().hasMinSize())
return false;
if (!EnableHomogeneousPrologEpilog)
return false;
if (ReverseCSRRestoreSeq)
return false;
if (EnableRedZone)
return false;
if (needsWinCFI(MF))
return false;
if (getSVEStackSize(MF))
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
return false;
if (Exit && getArgumentStackToRestore(MF, *Exit))
return false;
return true;
}
bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
}
static const unsigned DefaultSafeSPDisplacement = 255;
static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (MI.isDebugInstr() || MI.isPseudo() ||
MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::ADDSXri)
continue;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
StackOffset Offset;
if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
AArch64FrameOffsetCannotUpdate)
return 0;
}
}
}
return DefaultSafeSPDisplacement;
}
TargetStackID::Value
AArch64FrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
static unsigned getFixedObjectSize(const MachineFunction &MF,
const AArch64FunctionInfo *AFI, bool IsWin64,
bool IsFunclet) {
if (!IsWin64 || IsFunclet) {
return AFI->getTailCallReservedStack();
} else {
if (AFI->getTailCallReservedStack() != 0)
report_fatal_error("cannot generate ABI-changing tail call for Win64");
const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
return alignTo(VarArgsArea + UnwindHelpObject, 16);
}
}
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const unsigned RedZoneSize =
Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
if (!RedZoneSize)
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
uint64_t NumBytes = AFI->getLocalStackSize();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
getSVEStackSize(MF));
}
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
if (MF.hasEHFunclets())
return true;
if (MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
RegInfo->hasStackRealignment(MF))
return true;
if (!MFI.isMaxCallFrameSizeComputed() ||
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
return true;
return false;
}
bool
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects();
}
MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
if (!hasReservedCallFrame(MF)) {
int64_t Amount = I->getOperand(0).getImm();
Amount = alignTo(Amount, getStackAlign());
if (!IsDestroy)
Amount = -Amount;
if (CalleePopAmount == 0) {
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(Amount), TII);
}
} else if (CalleePopAmount != 0) {
assert(CalleePopAmount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
}
return MBB.erase(I);
}
void AArch64FrameLowering::emitCalleeSavedGPRLocations(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
for (const auto &Info : CSI) {
if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
unsigned DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
int64_t Offset =
MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
void AArch64FrameLowering::emitCalleeSavedSVELocations(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
for (const auto &Info : CSI) {
if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
unsigned Reg = Info.getReg();
if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
continue;
StackOffset Offset =
StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
emitCalleeSavedGPRLocations(MBB, MBBI);
emitCalleeSavedSVELocations(MBB, MBBI);
}
static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
unsigned DwarfReg) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg));
BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex);
}
void AArch64FrameLowering::resetCFIToInitialState(
MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const auto &TRI =
static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo());
const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
DebugLoc DL;
MachineBasicBlock::iterator InsertPt = MBB.begin();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
if (MFI.shouldSignReturnAddress()) {
CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
}
if (needsShadowCallStackPrologueEpilogue(MF))
insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
TRI.getDwarfRegNum(AArch64::X18, true));
const std::vector<CalleeSavedInfo> &CSI =
MF.getFrameInfo().getCalleeSavedInfo();
for (const auto &Info : CSI) {
unsigned Reg = Info.getReg();
if (!TRI.regNeedsCFI(Reg, Reg))
continue;
insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
TRI.getDwarfRegNum(Reg, true));
}
}
static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool SVE) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
for (const auto &Info : CSI) {
if (SVE !=
(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
continue;
unsigned Reg = Info.getReg();
if (SVE &&
!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
continue;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
}
void AArch64FrameLowering::emitCalleeSavedGPRRestores(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
emitCalleeSavedRestores(MBB, MBBI, false);
}
void AArch64FrameLowering::emitCalleeSavedSVERestores(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
emitCalleeSavedRestores(MBB, MBBI, true);
}
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
return 0;
#define CASE(n) \
case AArch64::W##n: \
case AArch64::X##n: \
return AArch64::X##n
CASE(0);
CASE(1);
CASE(2);
CASE(3);
CASE(4);
CASE(5);
CASE(6);
CASE(7);
CASE(8);
CASE(9);
CASE(10);
CASE(11);
CASE(12);
CASE(13);
CASE(14);
CASE(15);
CASE(16);
CASE(17);
CASE(18);
#undef CASE
#define CASE(n) \
case AArch64::B##n: \
case AArch64::H##n: \
case AArch64::S##n: \
case AArch64::D##n: \
case AArch64::Q##n: \
return HasSVE ? AArch64::Z##n : AArch64::Q##n
CASE(0);
CASE(1);
CASE(2);
CASE(3);
CASE(4);
CASE(5);
CASE(6);
CASE(7);
CASE(8);
CASE(9);
CASE(10);
CASE(11);
CASE(12);
CASE(13);
CASE(14);
CASE(15);
CASE(16);
CASE(17);
CASE(18);
CASE(19);
CASE(20);
CASE(21);
CASE(22);
CASE(23);
CASE(24);
CASE(25);
CASE(26);
CASE(27);
CASE(28);
CASE(29);
CASE(30);
CASE(31);
#undef CASE
}
}
void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
const MachineFunction &MF = *MBB.getParent();
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
BitVector GPRsToZero(TRI.getNumRegs());
BitVector FPRsToZero(TRI.getNumRegs());
bool HasSVE = STI.hasSVE();
for (MCRegister Reg : RegsToZero.set_bits()) {
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
GPRsToZero.set(XReg);
} else if (AArch64::FPR128RegClass.contains(Reg) ||
AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR32RegClass.contains(Reg) ||
AArch64::FPR16RegClass.contains(Reg) ||
AArch64::FPR8RegClass.contains(Reg)) {
if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
FPRsToZero.set(XReg);
}
}
const AArch64InstrInfo &TII = *STI.getInstrInfo();
for (MCRegister Reg : GPRsToZero.set_bits())
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0);
for (MCRegister Reg : FPRsToZero.set_bits())
if (HasSVE)
BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg)
.addImm(0)
.addImm(0);
else
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0);
if (HasSVE) {
for (MCRegister PReg :
{AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
AArch64::P15}) {
if (RegsToZero[PReg])
BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
}
}
}
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
MachineFunction *MF = MBB->getParent();
if (&MF->front() == MBB)
return AArch64::X9;
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (LiveRegs.available(MRI, AArch64::X9))
return AArch64::X9;
for (unsigned Reg : AArch64::GPR64RegClass) {
if (LiveRegs.available(MRI, Reg))
return Reg;
}
return AArch64::NoRegister;
}
bool AArch64FrameLowering::canUseAsPrologue(
const MachineBasicBlock &MBB) const {
const MachineFunction *MF = MBB.getParent();
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (!RegInfo->hasStackRealignment(*MF))
return true;
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
}
static bool windowsRequiresStackProbe(MachineFunction &MF,
uint64_t StackSizeInBytes) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetWindows())
return false;
const Function &F = MF.getFunction();
unsigned StackProbeSize = 4096;
if (F.hasFnAttribute("stack-probe-size"))
F.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
return (StackSizeInBytes >= StackProbeSize) &&
!F.hasFnAttribute("no-stack-arg-probe");
}
static bool needsWinCFI(const MachineFunction &MF) {
const Function &F = MF.getFunction();
return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
F.needsUnwindTableEntry();
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, uint64_t StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (homogeneousPrologEpilog(MF))
return false;
if (AFI->getLocalStackSize() == 0)
return false;
if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
MF.getFunction().hasOptSize())
return false;
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
return false;
if (MFI.hasVarSizedObjects())
return false;
if (RegInfo->hasStackRealignment(MF))
return false;
if (canUseRedZone(MF))
return false;
if (getSVEStackSize(MF))
return false;
return true;
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
return false;
if (MBB.empty())
return true;
MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastI != Begin) {
--LastI;
if (LastI->isTransient())
continue;
if (!LastI->getFlag(MachineInstr::FrameDestroy))
break;
}
switch (LastI->getOpcode()) {
case AArch64::STGloop:
case AArch64::STZGloop:
case AArch64::STGOffset:
case AArch64::STZGOffset:
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
return false;
default:
return true;
}
llvm_unreachable("unreachable");
}
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
const TargetInstrInfo &TII,
MachineInstr::MIFlag Flag) {
unsigned Opc = MBBI->getOpcode();
MachineBasicBlock *MBB = MBBI->getParent();
MachineFunction &MF = *MBB->getParent();
DebugLoc DL = MBBI->getDebugLoc();
unsigned ImmIdx = MBBI->getNumOperands() - 1;
int Imm = MBBI->getOperand(ImmIdx).getImm();
MachineInstrBuilder MIB;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
switch (Opc) {
default:
llvm_unreachable("No SEH Opcode for this instruction");
case AArch64::LDPDpost:
Imm = -Imm;
LLVM_FALLTHROUGH;
case AArch64::STPDpre: {
unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
.addImm(Reg0)
.addImm(Reg1)
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
case AArch64::LDPXpost:
Imm = -Imm;
LLVM_FALLTHROUGH;
case AArch64::STPXpre: {
Register Reg0 = MBBI->getOperand(1).getReg();
Register Reg1 = MBBI->getOperand(2).getReg();
if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
.addImm(Imm * 8)
.setMIFlag(Flag);
else
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
.addImm(RegInfo->getSEHRegNum(Reg0))
.addImm(RegInfo->getSEHRegNum(Reg1))
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
case AArch64::LDRDpost:
Imm = -Imm;
LLVM_FALLTHROUGH;
case AArch64::STRDpre: {
unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
.addImm(Reg)
.addImm(Imm)
.setMIFlag(Flag);
break;
}
case AArch64::LDRXpost:
Imm = -Imm;
LLVM_FALLTHROUGH;
case AArch64::STRXpre: {
unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
.addImm(Reg)
.addImm(Imm)
.setMIFlag(Flag);
break;
}
case AArch64::STPDi:
case AArch64::LDPDi: {
unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
.addImm(Reg0)
.addImm(Reg1)
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
case AArch64::STPXi:
case AArch64::LDPXi: {
Register Reg0 = MBBI->getOperand(0).getReg();
Register Reg1 = MBBI->getOperand(1).getReg();
if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
.addImm(Imm * 8)
.setMIFlag(Flag);
else
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
.addImm(RegInfo->getSEHRegNum(Reg0))
.addImm(RegInfo->getSEHRegNum(Reg1))
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
case AArch64::STRXui:
case AArch64::LDRXui: {
int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
.addImm(Reg)
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
case AArch64::STRDui:
case AArch64::LDRDui: {
unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
.addImm(Reg)
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
}
}
auto I = MBB->insertAfter(MBBI, MIB);
return I;
}
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
unsigned LocalStackSize) {
MachineOperand *ImmOpnd = nullptr;
unsigned ImmIdx = MBBI->getNumOperands() - 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Fix the offset in the SEH instruction");
case AArch64::SEH_SaveFPLR:
case AArch64::SEH_SaveRegP:
case AArch64::SEH_SaveReg:
case AArch64::SEH_SaveFRegP:
case AArch64::SEH_SaveFReg:
ImmOpnd = &MBBI->getOperand(ImmIdx);
break;
}
if (ImmOpnd)
ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
}
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
int CFAOffset = 0) {
unsigned NewOpc;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
break;
case AArch64::STPQi:
NewOpc = AArch64::STPQpre;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
break;
case AArch64::STRQui:
NewOpc = AArch64::STRQpre;
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
break;
case AArch64::LDPQi:
NewOpc = AArch64::LDPQpost;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
break;
case AArch64::LDRQui:
NewOpc = AArch64::LDRQpost;
break;
}
if (NeedsWinCFI) {
auto SEH = std::next(MBBI);
if (AArch64InstrInfo::isSEHInstruction(*SEH))
SEH->eraseFromParent();
}
TypeSize Scale = TypeSize::Fixed(1);
unsigned Width;
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
(void)Success;
assert(Success && "unknown load/store opcode");
MachineFunction &MF = *MBB.getParent();
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
false, false, nullptr, EmitCFI,
StackOffset::getFixed(CFAOffset));
return std::prev(MBBI);
}
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
unsigned OpndIdx = 0;
for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
++OpndIdx)
MIB.add(MBBI->getOperand(OpndIdx));
assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
"Unexpected immediate offset in first/last callee-save save/restore "
"instruction!");
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
assert(CSStackSizeInc % Scale == 0);
MIB.addImm(CSStackSizeInc / (int)Scale);
MIB.setMIFlags(MBBI->getFlags());
MIB.setMemRefs(MBBI->memoperands());
if (NeedsWinCFI) {
*HasWinCFI = true;
InsertSEH(*MIB, *TII, FrameFlag);
}
if (EmitCFI) {
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(FrameFlag);
}
return std::prev(MBB.erase(MBBI));
}
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
uint64_t LocalStackSize,
bool NeedsWinCFI,
bool *HasWinCFI) {
if (AArch64InstrInfo::isSEHInstruction(MI))
return;
unsigned Opc = MI.getOpcode();
unsigned Scale;
switch (Opc) {
case AArch64::STPXi:
case AArch64::STRXui:
case AArch64::STPDi:
case AArch64::STRDui:
case AArch64::LDPXi:
case AArch64::LDRXui:
case AArch64::LDPDi:
case AArch64::LDRDui:
Scale = 8;
break;
case AArch64::STPQi:
case AArch64::STRQui:
case AArch64::LDPQi:
case AArch64::LDRQui:
Scale = 16;
break;
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
}
unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
assert(LocalStackSize % Scale == 0);
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
if (NeedsWinCFI) {
*HasWinCFI = true;
auto MBBI = std::next(MachineBasicBlock::iterator(MI));
assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
"Expecting a SEH instruction");
fixupSEHOpcode(MBBI, LocalStackSize);
}
}
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
case AArch64::STR_ZXI:
case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_PXI:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
}
static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) {
if (!(llvm::any_of(
MF.getFrameInfo().getCalleeSavedInfo(),
[](const auto &Info) { return Info.getReg() == AArch64::LR; }) &&
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)))
return false;
if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
report_fatal_error("Must reserve x18 to use shadow call stack");
return true;
}
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool NeedsWinCFI,
bool NeedsUnwindInfo) {
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
.addReg(AArch64::X18, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::X18)
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
MBB.addLiveIn(AArch64::X18);
if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsUnwindInfo) {
static const char CFIInst[] = {
dwarf::DW_CFA_val_expression,
18, 2, static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
static_cast<char>(-8) & 0x7f, };
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
nullptr, StringRef(CFIInst, sizeof(CFIInst))));
BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
}
}
static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) {
BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
.addReg(AArch64::X18, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::X18)
.addImm(-8)
.setMIFlag(MachineInstr::FrameDestroy);
if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo()) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const Function &F = MF.getFunction();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool EmitCFI = AFI->needsDwarfUnwindInfo();
bool HasFP = hasFP(MF);
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
bool IsFunclet = MBB.isEHFuncletEntry();
AFI->setHasRedZone(false);
DebugLoc DL;
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
if (needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
MFnI.needsDwarfUnwindInfo());
if (MFnI.shouldSignReturnAddress()) {
unsigned PACI;
if (MFnI.shouldSignWithBKey()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
.setMIFlag(MachineInstr::FrameSetup);
PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP;
} else {
PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP;
}
auto MI = BuildMI(MBB, MBBI, DL, TII->get(PACI));
if (Subtarget.hasPAuth())
MI.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP, RegState::InternalRead);
MI.setMIFlag(MachineInstr::FrameSetup);
if (EmitCFI) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
if (EmitCFI && MFnI.isMTETagged()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
.setMIFlag(MachineInstr::FrameSetup);
}
if (HasFP && AFI->hasSwiftAsyncContext()) {
switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
case SwiftAsyncFramePointerMode::DeploymentBased:
if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
.addExternalSymbol("swift_async_extendedFramePointerFlags",
AArch64II::MO_GOT);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
.addUse(AArch64::FP)
.addUse(AArch64::X16)
.addImm(Subtarget.isTargetILP32() ? 32 : 0);
break;
}
LLVM_FALLTHROUGH;
case SwiftAsyncFramePointerMode::Always:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
.addUse(AArch64::FP)
.addImm(0x1100)
.setMIFlag(MachineInstr::FrameSetup);
break;
case SwiftAsyncFramePointerMode::Never:
break;
}
}
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
Optional<int> TBPI = AFI->getTaggedBasePointerIndex();
if (TBPI)
AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
else
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
const StackOffset &SVEStackSize = getSVEStackSize(MF);
int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
: MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
assert(!SVEStackSize &&
"unexpected function without stack frame but with SVE objects");
AFI->setLocalStackSize(NumBytes);
if (!NumBytes)
return;
if (canUseRedZone(MF)) {
AFI->setHasRedZone(true);
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (EmitCFI) {
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
return;
}
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
EmitCFI);
NumBytes = 0;
} else if (HomPrologEpilog) {
NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
EmitCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
++MBBI;
}
if (!IsFunclet && HasFP) {
int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
if (AFI->hasSwiftAsyncContext()) {
const auto &Attrs = MF.getFunction().getAttributes();
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
.addUse(HaveInitialContext ? AArch64::X22 : AArch64::XZR)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
.setMIFlags(MachineInstr::FrameSetup);
}
if (HomPrologEpilog) {
auto Prolog = MBBI;
--Prolog;
assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
StackOffset::getFixed(FPOffset), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (EmitCFI) {
const int OffsetToFirstCalleeSaveFromFP =
AFI->getCalleeSaveBaseToFrameRecordOffset() -
AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
if (EmitCFI)
emitCalleeSavedGPRLocations(MBB, MBBI);
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint64_t NumWords = NumBytes >> 4;
if (NeedsWinCFI) {
HasWinCFI = true;
if (NumBytes >= (1 << 28))
report_fatal_error("Stack size cannot exceed 256MB for stack "
"unwinding purposes");
uint32_t LowNumWords = NumWords & 0xFFFF;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
.addImm(LowNumWords)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
if ((NumWords & 0xFFFF0000) != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
.addReg(AArch64::X15)
.addImm((NumWords & 0xFFFF0000) >> 16) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
} else {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
.addImm(NumWords)
.setMIFlags(MachineInstr::FrameSetup);
}
switch (MF.getTarget().getCodeModel()) {
case CodeModel::Tiny:
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol("__chkstk")
.addReg(AArch64::X15, RegState::Implicit)
.addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
.addReg(AArch64::X16, RegState::Define)
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
break;
}
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
.addReg(AArch64::SP, RegState::Kill)
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
.addImm(NumBytes)
.setMIFlag(MachineInstr::FrameSetup);
}
NumBytes = 0;
}
StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
CalleeSavesEnd = MBBI;
AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
AllocateAfter = SVEStackSize - AllocateBefore;
}
emitFrameOffset(
MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
MachineInstr::FrameSetup, false, false, nullptr,
EmitCFI && !HasFP && AllocateBefore,
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
if (EmitCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
nullptr, EmitCFI && !HasFP && AllocateAfter,
AllocateBefore + StackOffset::getFixed(
(int64_t)MFI.getStackSize() - NumBytes));
if (NumBytes) {
const bool NeedsRealignment =
!IsFunclet && RegInfo->hasStackRealignment(MF);
unsigned scratchSPReg = AArch64::SP;
if (NeedsRealignment) {
scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
assert(scratchSPReg != AArch64::NoRegister);
}
if (!canUseRedZone(MF)) {
emitFrameOffset(
MBB, MBBI, DL, scratchSPReg, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
SVEStackSize +
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
}
if (NeedsRealignment) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
assert(NrBitsToZero > 1);
assert(scratchSPReg != AArch64::SP);
uint32_t andMaskEncoded = (1 << 12) | ((64 - NrBitsToZero) << 6) | ((64 - NrBitsToZero - 1) << 0);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
.addImm(NumBytes & andMaskEncoded)
.setMIFlag(MachineInstr::FrameSetup);
}
}
}
if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
}
if (NeedsWinCFI && HasWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
if (IsFunclet && F.hasPersonalityFn()) {
EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
if (isAsynchronousEHPersonality(Per)) {
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
.addReg(AArch64::X1)
.setMIFlag(MachineInstr::FrameSetup);
MBB.addLiveIn(AArch64::X1);
}
}
}
static void InsertReturnAddressAuth(MachineFunction &MF,
MachineBasicBlock &MBB) {
const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
if (!MFI.shouldSignReturnAddress())
return;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
} else {
BuildMI(
MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
}
static bool isFuncletReturnInstr(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::CATCHRET:
case AArch64::CLEANUPRET:
return true;
}
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool NeedsWinCFI = needsWinCFI(MF);
bool EmitCFI = MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo();
bool HasWinCFI = false;
bool IsFunclet = false;
auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
IsFunclet = isFuncletReturnInstr(*MBBI);
}
auto FinishingTouches = make_scope_exit([&]() {
InsertReturnAddressAuth(MF, MBB);
if (needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
if (EmitCFI)
emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
});
int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
: MFI.getStackSize();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
int64_t AfterCSRPopSize = ArgumentStackToRestore;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
if (homogeneousPrologEpilog(MF, &MBB)) {
assert(!NeedsWinCFI);
auto LastPopI = MBB.getFirstTerminator();
if (LastPopI != MBB.begin()) {
auto HomogeneousEpilog = std::prev(LastPopI);
if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
LastPopI = HomogeneousEpilog;
}
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getLocalStackSize()), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI);
assert(AfterCSRPopSize == 0);
return;
}
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
AArch64InstrInfo::isSEHInstruction(*Pop))
Pop = std::prev(Pop);
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
MachineInstr::FrameDestroy, PrologueSaveSize);
} else {
AfterCSRPopSize += PrologueSaveSize;
CombineAfterCSRBump = true;
}
}
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
IsSVECalleeSave(LastPopI)) {
++LastPopI;
break;
} else if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
}
if (MF.hasWinCFI()) {
HasWinCFI = true;
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
}
if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
case SwiftAsyncFramePointerMode::DeploymentBased:
LLVM_FALLTHROUGH;
case SwiftAsyncFramePointerMode::Always:
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
AArch64::FP)
.addUse(AArch64::FP)
.addImm(0x10fe)
.setMIFlag(MachineInstr::FrameDestroy);
break;
case SwiftAsyncFramePointerMode::Never:
break;
}
}
const StackOffset &SVEStackSize = getSVEStackSize(MF);
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
if (EmitCFI && hasFP(MF)) {
const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes));
BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes));
if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
return;
}
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
IsSVECalleeSave(std::prev(RestoreBegin)))
--RestoreBegin;
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
StackOffset::getScalable(CalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
if (SVEStackSize) {
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
StackOffset::getScalable(-CalleeSavedSize), TII,
MachineInstr::FrameDestroy);
}
} else {
if (AFI->getSVECalleeSavedStackSize()) {
emitFrameOffset(
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
false, false, nullptr, EmitCFI && !hasFP(MF),
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
NumBytes = 0;
}
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
false, nullptr, EmitCFI && !hasFP(MF),
SVEStackSize +
StackOffset::getFixed(NumBytes + PrologueSaveSize));
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
false, nullptr, EmitCFI && !hasFP(MF),
DeallocateAfter +
StackOffset::getFixed(NumBytes + PrologueSaveSize));
}
if (EmitCFI)
emitCalleeSavedSVERestores(MBB, RestoreEnd);
}
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
if (RedZone && AfterCSRPopSize == 0)
return;
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
emitFrameOffset(
MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(StackRestoreBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
}
return;
}
NumBytes = 0;
}
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
emitFrameOffset(
MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI);
if (EmitCFI && hasFP(MF)) {
const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize));
BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
if (AfterCSRPopSize) {
assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
"interrupt may have clobbered");
emitFrameOffset(
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
}
if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
}
StackOffset
AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const {
return resolveFrameIndexReference(
MF, FI, FrameReg,
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
false);
}
StackOffset
AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
int FI) const {
return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
}
static StackOffset getFPOffset(const MachineFunction &MF,
int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, false);
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t FPAdjust =
CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
static StackOffset getStackOffset(const MachineFunction &MF,
int64_t ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
? getFPOffset(MF, ObjectOffset).getFixed()
: getStackOffset(MF, ObjectOffset).getFixed();
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
Register &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
const StackOffset &SVEStackSize = getSVEStackSize(MF);
bool UseFP = false;
if (AFI->hasStackFrame() && !isSVE) {
PreferFP &= !SVEStackSize;
if (isFixed) {
UseFP = hasFP(MF);
} else if (isCSR && RegInfo->hasStackRealignment(MF)) {
assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
UseFP = true;
} else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
bool FPOffsetFits = !ForSimm || FPOffset >= -256;
PreferFP |= Offset > -FPOffset && !SVEStackSize;
if (MFI.hasVarSizedObjects()) {
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) UseFP = PreferFP;
else if (!CanUseBP) UseFP = true;
} else if (FPOffset >= 0) {
UseFP = true;
} else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
(void) Subtarget;
assert(
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
"Funclets should only be present on Win64");
UseFP = true;
} else {
if (FPOffsetFits && PreferFP) UseFP = true;
}
}
}
assert(
((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
StackOffset FPOffset =
StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
RegInfo->hasStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
: (unsigned)AArch64::SP;
return SPOffset;
}
StackOffset ScalableOffset = {};
if (UseFP && !(isFixed || isCSR))
ScalableOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
ScalableOffset = SVEStackSize;
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return StackOffset::getFixed(FPOffset) + ScalableOffset;
}
if (RegInfo->hasBasePointer(MF))
FrameReg = RegInfo->getBaseRegister();
else {
assert(!MFI.hasVarSizedObjects() &&
"Can't use SP when we have var sized objects.");
FrameReg = AArch64::SP;
if (canUseRedZone(MF))
Offset -= AFI->getLocalStackSize();
}
return StackOffset::getFixed(Offset) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
return getKillRegState(!IsLiveIn);
}
static bool produceCompactUnwindFrame(MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AttributeList Attrs = MF.getFunction().getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
bool NeedsWinCFI, bool IsFirst) {
if (Reg2 == AArch64::FP)
return true;
if (!NeedsWinCFI)
return false;
if (Reg2 == Reg1 + 1)
return false;
if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
return false;
return true;
}
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
bool UsesWinAAPCS, bool NeedsWinCFI,
bool NeedsFrameRecord, bool IsFirst) {
if (UsesWinAAPCS)
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
if (NeedsFrameRecord)
return Reg2 == AArch64::LR;
return false;
}
namespace {
struct RegPairInfo {
unsigned Reg1 = AArch64::NoRegister;
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
RegPairInfo() = default;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
unsigned getScale() const {
switch (Type) {
case PPR:
return 2;
case GPR:
case FPR64:
return 8;
case ZPR:
case FPR128:
return 16;
}
llvm_unreachable("Unsupported type");
}
bool isScalable() const { return Type == PPR || Type == ZPR; }
};
}
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
bool NeedsFrameRecord) {
if (CSI.empty())
return;
bool IsWindows = isTargetWindows(MF);
bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
unsigned Count = CSI.size();
(void)CC;
assert((!produceCompactUnwindFrame(MF) ||
CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
int StackFillDir = -1;
int RegInc = 1;
unsigned FirstReg = 0;
if (NeedsWinCFI) {
ByteOffset = 0;
StackFillDir = 1;
RegInc = -1;
FirstReg = Count - 1;
}
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
for (unsigned i = FirstReg; i < Count; i += RegInc) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
if (AArch64::GPR64RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::GPR;
else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR64;
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR128;
else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::ZPR;
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::PPR;
else
llvm_unreachable("Unsupported register class.");
if (unsigned(i + RegInc) < Count) {
Register NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
if (AArch64::FPR128RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::PPR:
case RegPairInfo::ZPR:
break;
}
}
assert((!RPI.isPaired() ||
(CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
"Out of order callee saved regs!");
assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
RPI.Reg1 == AArch64::LR) &&
"FrameRecord must be allocated together with LR");
assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
RPI.Reg2 == AArch64::LR) &&
"FrameRecord must be allocated together with LR");
assert((!produceCompactUnwindFrame(MF) ||
CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
"Callee-save registers not saved as adjacent register pair!");
RPI.FrameIdx = CSI[i].getFrameIdx();
if (NeedsWinCFI &&
RPI.isPaired()) RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
int Scale = RPI.getScale();
int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(OffsetPre % Scale == 0);
if (RPI.isScalable())
ScalableByteOffset += StackFillDir * Scale;
else
ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
RPI.Reg2 == AArch64::FP)
ByteOffset += StackFillDir * 8;
assert(!(RPI.isScalable() && RPI.isPaired()) &&
"Paired spill/fill instructions don't exist for SVE vectors");
if (NeedGapToAlignStack && !NeedsWinCFI &&
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
!RPI.isPaired() && ByteOffset % 16 != 0) {
ByteOffset += 8 * StackFillDir;
assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
NeedGapToAlignStack = false;
}
int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(OffsetPost % Scale == 0);
int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
RPI.Reg2 == AArch64::FP)
Offset += 8;
RPI.Offset = Offset / Scale;
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
RPI.Reg2 == AArch64::FP) ||
(IsWindows && RPI.Reg1 == AArch64::FP &&
RPI.Reg2 == AArch64::LR)))
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
RegPairs.push_back(RPI);
if (RPI.isPaired())
i += RegInc;
}
if (NeedsWinCFI) {
if (AFI->hasCalleeSaveStackFreeSpace())
MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
std::reverse(RegPairs.begin(), RegPairs.end());
}
}
bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
bool NeedsWinCFI = needsWinCFI(MF);
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (homogeneousPrologEpilog(MF)) {
auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
.setMIFlag(MachineInstr::FrameSetup);
for (auto &RPI : RegPairs) {
MIB.addReg(RPI.Reg1);
MIB.addReg(RPI.Reg2);
if (!MRI.isReserved(RPI.Reg1))
MBB.addLiveIn(RPI.Reg1);
if (!MRI.isReserved(RPI.Reg2))
MBB.addLiveIn(RPI.Reg2);
}
return true;
}
for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
unsigned StrOpc;
unsigned Size;
Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
Size = 8;
Alignment = Align(8);
break;
case RegPairInfo::FPR64:
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Size = 8;
Alignment = Align(8);
break;
case RegPairInfo::FPR128:
StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
Size = 16;
Alignment = Align(16);
break;
case RegPairInfo::ZPR:
StrOpc = AArch64::STR_ZXI;
Size = 16;
Alignment = Align(16);
break;
case RegPairInfo::PPR:
StrOpc = AArch64::STR_PXI;
Size = 2;
Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
"Windows unwdinding requires a consecutive (FP,LR) pair");
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
if (NeedsWinCFI && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
MBB.addLiveIn(Reg1);
if (RPI.isPaired()) {
if (!MRI.isReserved(Reg2))
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
MachineMemOperand::MOStore, Size, Alignment));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
.addImm(RPI.Offset) .setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOStore, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector);
}
return true;
}
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
bool NeedsWinCFI = needsWinCFI(MF);
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
unsigned LdrOpc;
unsigned Size;
Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
Size = 8;
Alignment = Align(8);
break;
case RegPairInfo::FPR64:
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Size = 8;
Alignment = Align(8);
break;
case RegPairInfo::FPR128:
LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
Size = 16;
Alignment = Align(16);
break;
case RegPairInfo::ZPR:
LdrOpc = AArch64::LDR_ZXI;
Size = 16;
Alignment = Align(16);
break;
case RegPairInfo::PPR:
LdrOpc = AArch64::LDR_PXI;
Size = 2;
Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
if (NeedsWinCFI && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
MachineMemOperand::MOLoad, Size, Alignment));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(RPI.Offset) .setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
return MIB->getIterator();
};
for (const RegPairInfo &RPI : reverse(RegPairs))
if (RPI.isScalable())
EmitMI(RPI);
if (homogeneousPrologEpilog(MF, &MBB)) {
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
.setMIFlag(MachineInstr::FrameDestroy);
for (auto &RPI : RegPairs) {
MIB.addReg(RPI.Reg1, RegState::Define);
MIB.addReg(RPI.Reg2, RegState::Define);
}
return true;
}
if (ReverseCSRRestoreSeq) {
MachineBasicBlock::iterator First = MBB.end();
for (const RegPairInfo &RPI : reverse(RegPairs)) {
if (RPI.isScalable())
continue;
MachineBasicBlock::iterator It = EmitMI(RPI);
if (First == MBB.end())
First = It;
}
if (First != MBB.end())
MBB.splice(MBBI, &MBB, First);
} else {
for (const RegPairInfo &RPI : RegPairs) {
if (RPI.isScalable())
continue;
(void)EmitMI(RPI);
}
}
return true;
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
MachineFrameInfo &MFI = MF.getFrameInfo();
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;
unsigned ExtraCSSpill = 0;
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
if (Reg == BasePointerReg)
SavedRegs.set(Reg);
bool RegUsed = SavedRegs.test(Reg);
unsigned PairedReg = AArch64::NoRegister;
if (AArch64::GPR64RegClass.contains(Reg) ||
AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg))
PairedReg = CSRegs[i ^ 1];
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
UnspilledCSGPR = Reg;
UnspilledCSGPRPaired = PairedReg;
}
continue;
}
if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
!SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
ExtraCSSpill = PairedReg;
}
}
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
!Subtarget.isTargetWindows()) {
SavedRegs.set(AArch64::X18);
}
unsigned CSStackSize = 0;
unsigned SVECSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
if (AArch64::PPRRegClass.contains(Reg) ||
AArch64::ZPRRegClass.contains(Reg))
SVECSStackSize += RegSize;
else
CSStackSize += RegSize;
}
unsigned NumSavedRegs = SavedRegs.count();
uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
if (hasFP(MF) ||
windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
SavedRegs.set(AArch64::FP);
SavedRegs.set(AArch64::LR);
}
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
<< ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
int64_t SVEStackSize =
alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
bool BigStack = SVEStackSize ||
(EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
if (BigStack) {
if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
if (producePairRegisters(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPR;
}
if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass &RC = AArch64::GPR64RegClass;
unsigned Size = TRI->getSpillSize(RC);
Align Alignment = TRI->getSpillAlign(RC);
int FI = MFI.CreateStackObject(Size, Alignment, false);
RS->addScavengingFrameIndex(FI);
LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
}
}
CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
if (hasFP(MF) && AFI->hasSwiftAsyncContext())
CSStackSize += 8;
uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
<< EstimatedStackSize + AlignedCSStackSize
<< " bytes.\n");
assert((!MFI.isCalleeSavedInfoValid() ||
AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
"Should not invalidate callee saved info");
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *RegInfo,
std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
unsigned &MaxCSFrameIndex) const {
bool NeedsWinCFI = needsWinCFI(MF);
if (NeedsWinCFI)
std::reverse(CSI.begin(), CSI.end());
if (CSI.empty())
return true;
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool UsesWinAAPCS = isTargetWindows(MF);
if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
}
for (auto &CS : CSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
unsigned Size = RegInfo->getSpillSize(*RC);
Align Alignment(RegInfo->getSpillAlign(*RC));
int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
CS.setFrameIdx(FrameIdx);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
Reg == AArch64::FP) {
FrameIdx = MFI.CreateStackObject(8, Alignment, true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
}
}
return true;
}
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
int &Min, int &Max) {
Min = std::numeric_limits<int>::max();
Max = std::numeric_limits<int>::min();
if (!MFI.isCalleeSavedInfoValid())
return false;
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
for (auto &CS : CSI) {
if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
AArch64::PPRRegClass.contains(CS.getReg())) {
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
}
return Min != std::numeric_limits<int>::max();
}
static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
int &MinCSFrameIndex,
int &MaxCSFrameIndex,
bool AssignOffsets) {
#ifndef NDEBUG
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
auto Assign = [&MFI](int FI, int64_t Offset) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
MFI.setObjectOffset(FI, Offset);
};
int64_t Offset = 0;
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
Offset += MFI.getObjectSize(I);
Offset = alignTo(Offset, MFI.getObjectAlign(I));
if (AssignOffsets)
Assign(I, -Offset);
}
}
Offset = alignTo(Offset, Align(16U));
SmallVector<int, 8> ObjectsToAllocate;
int StackProtectorFI = -1;
if (MFI.hasStackProtectorIndex()) {
StackProtectorFI = MFI.getStackProtectorIndex();
if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
ObjectsToAllocate.push_back(StackProtectorFI);
}
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
unsigned StackID = MFI.getStackID(I);
if (StackID != TargetStackID::ScalableVector)
continue;
if (I == StackProtectorFI)
continue;
if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
continue;
if (MFI.isDeadObjectIndex(I))
continue;
ObjectsToAllocate.push_back(I);
}
for (unsigned FI : ObjectsToAllocate) {
Align Alignment = MFI.getObjectAlign(FI);
if (Alignment > Align(16))
report_fatal_error(
"Alignment of scalable vectors > 16 bytes is not yet supported");
Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
if (AssignOffsets)
Assign(FI, -Offset);
}
return Offset;
}
int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
MachineFrameInfo &MFI) const {
int MinCSFrameIndex, MaxCSFrameIndex;
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
}
int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
true);
}
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
int MinCSFrameIndex, MaxCSFrameIndex;
int64_t SVEStackSize =
assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
if (!MF.hasEHFunclets())
return;
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
MachineBasicBlock &MBB = MF.front();
auto MBBI = MBB.begin();
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;
int64_t FixedObject =
getFixedObjectSize(MF, AFI, true, false);
int UnwindHelpFI = MFI.CreateFixedObject( 8,
-FixedObject,
false);
EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
DebugLoc DL;
RS->enterBasicBlockEnd(MBB);
RS->backward(std::prev(MBBI));
Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
.addReg(DstReg, getKillRegState(true))
.addFrameIndex(UnwindHelpFI)
.addImm(0);
}
namespace {
struct TagStoreInstr {
MachineInstr *MI;
int64_t Offset, Size;
explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
: MI(MI), Offset(Offset), Size(Size) {}
};
class TagStoreEdit {
MachineFunction *MF;
MachineBasicBlock *MBB;
MachineRegisterInfo *MRI;
SmallVector<TagStoreInstr, 8> TagStores;
SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
Register FrameReg;
StackOffset FrameRegOffset;
int64_t Size;
Optional<int64_t> FrameRegUpdate;
unsigned FrameRegUpdateFlags;
bool ZeroData;
DebugLoc DL;
void emitUnrolled(MachineBasicBlock::iterator InsertI);
void emitLoop(MachineBasicBlock::iterator InsertI);
public:
TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
: MBB(MBB), ZeroData(ZeroData) {
MF = MBB->getParent();
MRI = &MF->getRegInfo();
}
void addInstruction(TagStoreInstr I) {
assert((TagStores.empty() ||
TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
"Non-adjacent tag store instructions.");
TagStores.push_back(I);
}
void clear() { TagStores.clear(); }
void emitCode(MachineBasicBlock::iterator &InsertI,
const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
};
void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
const AArch64InstrInfo *TII =
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
const int64_t kMinOffset = -256 * 16;
const int64_t kMaxOffset = 255 * 16;
Register BaseReg = FrameReg;
int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
if (BaseRegOffsetBytes < kMinOffset ||
BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
StackOffset::getFixed(BaseRegOffsetBytes), TII);
BaseReg = ScratchReg;
BaseRegOffsetBytes = 0;
}
MachineInstr *LastI = nullptr;
while (Size) {
int64_t InstrSize = (Size > 16) ? 32 : 16;
unsigned Opcode =
InstrSize == 16
? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
: (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
.addReg(AArch64::SP)
.addReg(BaseReg)
.addImm(BaseRegOffsetBytes / 16)
.setMemRefs(CombinedMemRefs);
if (BaseRegOffsetBytes == 0)
LastI = I;
BaseRegOffsetBytes += InstrSize;
Size -= InstrSize;
}
if (LastI)
MBB->splice(InsertI, MBB, LastI);
}
void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
const AArch64InstrInfo *TII =
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
Register BaseReg = FrameRegUpdate
? FrameReg
: MRI->createVirtualRegister(&AArch64::GPR64RegClass);
Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
int64_t LoopSize = Size;
if (FrameRegUpdate && *FrameRegUpdate)
LoopSize -= LoopSize % 32;
MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
TII->get(ZeroData ? AArch64::STZGloop_wback
: AArch64::STGloop_wback))
.addDef(SizeReg)
.addDef(BaseReg)
.addImm(LoopSize)
.addReg(BaseReg)
.setMemRefs(CombinedMemRefs);
if (FrameRegUpdate)
LoopI->setFlags(FrameRegUpdateFlags);
int64_t ExtraBaseRegUpdate =
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
BuildMI(*MBB, InsertI, DL,
TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
.addDef(BaseReg)
.addReg(BaseReg)
.addReg(BaseReg)
.addImm(1 + ExtraBaseRegUpdate / 16)
.setMemRefs(CombinedMemRefs)
.setMIFlags(FrameRegUpdateFlags);
} else if (ExtraBaseRegUpdate) {
BuildMI(
*MBB, InsertI, DL,
TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
.addDef(BaseReg)
.addReg(BaseReg)
.addImm(std::abs(ExtraBaseRegUpdate))
.addImm(0)
.setMIFlags(FrameRegUpdateFlags);
}
}
bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
int64_t Size, int64_t *TotalOffset) {
MachineInstr &MI = *II;
if ((MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::SUBXri) &&
MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
int64_t Offset = MI.getOperand(2).getImm() << Shift;
if (MI.getOpcode() == AArch64::SUBXri)
Offset = -Offset;
int64_t AbsPostOffset = std::abs(Offset - Size);
const int64_t kMaxOffset =
0xFFF; if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
*TotalOffset = Offset;
return true;
}
}
return false;
}
void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
SmallVectorImpl<MachineMemOperand *> &MemRefs) {
MemRefs.clear();
for (auto &TS : TSE) {
MachineInstr *MI = TS.MI;
if (MI->memoperands_empty()) {
MemRefs.clear();
return;
}
MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
}
}
void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
const AArch64FrameLowering *TFI,
bool TryMergeSPUpdate) {
if (TagStores.empty())
return;
TagStoreInstr &FirstTagStore = TagStores[0];
TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
DL = TagStores[0].MI->getDebugLoc();
Register Reg;
FrameRegOffset = TFI->resolveFrameOffsetReference(
*MF, FirstTagStore.Offset, false , false , Reg,
false, true);
FrameReg = Reg;
FrameRegUpdate = None;
mergeMemRefs(TagStores, CombinedMemRefs);
LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
for (const auto &Instr
: TagStores) { dbgs() << " " << *Instr.MI; });
const int kSetTagLoopThreshold = 176;
if (Size < kSetTagLoopThreshold) {
if (TagStores.size() < 2)
return;
emitUnrolled(InsertI);
} else {
MachineInstr *UpdateInstr = nullptr;
int64_t TotalOffset = 0;
if (TryMergeSPUpdate) {
if (InsertI != MBB->end() &&
canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
&TotalOffset)) {
UpdateInstr = &*InsertI++;
LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
<< *UpdateInstr);
}
}
if (!UpdateInstr && TagStores.size() < 2)
return;
if (UpdateInstr) {
FrameRegUpdate = TotalOffset;
FrameRegUpdateFlags = UpdateInstr->getFlags();
}
emitLoop(InsertI);
if (UpdateInstr)
UpdateInstr->eraseFromParent();
}
for (auto &TS : TagStores)
TS.MI->eraseFromParent();
}
bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
int64_t &Size, bool &ZeroData) {
MachineFunction &MF = *MI.getParent()->getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Opcode = MI.getOpcode();
ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
Opcode == AArch64::STZ2GOffset);
if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
return false;
if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
return false;
Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
Size = MI.getOperand(2).getImm();
return true;
}
if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
Size = 16;
else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
Size = 32;
else
return false;
if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
return false;
Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
16 * MI.getOperand(2).getImm();
return true;
}
MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
const AArch64FrameLowering *TFI,
RegScavenger *RS) {
bool FirstZeroData;
int64_t Size, Offset;
MachineInstr &MI = *II;
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator NextI = ++II;
if (&MI == &MBB->instr_back())
return II;
if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
return II;
SmallVector<TagStoreInstr, 4> Instrs;
Instrs.emplace_back(&MI, Offset, Size);
constexpr int kScanLimit = 10;
int Count = 0;
for (MachineBasicBlock::iterator E = MBB->end();
NextI != E && Count < kScanLimit; ++NextI) {
MachineInstr &MI = *NextI;
bool ZeroData;
int64_t Size, Offset;
if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
if (ZeroData != FirstZeroData)
break;
Instrs.emplace_back(&MI, Offset, Size);
continue;
}
if (!MI.isTransient())
++Count;
if (MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy))
break;
if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
break;
}
MachineBasicBlock::iterator InsertI = Instrs.back().MI;
InsertI++;
llvm::stable_sort(Instrs,
[](const TagStoreInstr &Left, const TagStoreInstr &Right) {
return Left.Offset < Right.Offset;
});
int64_t CurOffset = Instrs[0].Offset;
for (auto &Instr : Instrs) {
if (CurOffset > Instr.Offset)
return NextI;
CurOffset = Instr.Offset + Instr.Size;
}
TagStoreEdit TSE(MBB, FirstZeroData);
Optional<int64_t> EndOffset;
for (auto &Instr : Instrs) {
if (EndOffset && *EndOffset != Instr.Offset) {
TSE.emitCode(InsertI, TFI, false);
TSE.clear();
}
TSE.addInstruction(Instr);
EndOffset = Instr.Offset + Instr.Size;
}
TSE.emitCode(InsertI, TFI,
!MBB->getParent()
->getInfo<AArch64FunctionInfo>()
->needsAsyncDwarfUnwindInfo());
return InsertI;
}
}
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const {
if (StackTaggingMergeSetTag)
for (auto &BB : MF)
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
II = tryMergeAdjacentSTG(II, this, RS);
}
StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
const MachineFunction &MF, int FI, Register &FrameReg,
bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (IgnoreSPUpdates) {
LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
<< MFI.getObjectOffset(FI) << "\n");
FrameReg = AArch64::SP;
return StackOffset::getFixed(MFI.getObjectOffset(FI));
}
if (MFI.hasVarSizedObjects() ||
MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
return getFrameIndexReference(MF, FI, FrameReg);
FrameReg = AArch64::SP;
return getStackOffset(MF, MFI.getObjectOffset(FI));
}
unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
const MachineFunction &MF) const {
return 0;
}
unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
const MachineFunction &MF) const {
unsigned CSSize =
MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
getStackAlign());
}
namespace {
struct FrameObject {
bool IsValid = false;
int ObjectIndex = 0;
int GroupIndex = -1;
bool ObjectFirst = false;
bool GroupFirst = false;
};
class GroupBuilder {
SmallVector<int, 8> CurrentMembers;
int NextGroupIndex = 0;
std::vector<FrameObject> &Objects;
public:
GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
void AddMember(int Index) { CurrentMembers.push_back(Index); }
void EndCurrentGroup() {
if (CurrentMembers.size() > 1) {
LLVM_DEBUG(dbgs() << "group:");
for (int Index : CurrentMembers) {
Objects[Index].GroupIndex = NextGroupIndex;
LLVM_DEBUG(dbgs() << " " << Index);
}
LLVM_DEBUG(dbgs() << "\n");
NextGroupIndex++;
}
CurrentMembers.clear();
}
};
bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
A.ObjectIndex) <
std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
B.ObjectIndex);
}
}
void AArch64FrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
if (!OrderFrameObjects || ObjectsToAllocate.empty())
return;
const MachineFrameInfo &MFI = MF.getFrameInfo();
std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
for (auto &Obj : ObjectsToAllocate) {
FrameObjects[Obj].IsValid = true;
FrameObjects[Obj].ObjectIndex = Obj;
}
GroupBuilder GB(FrameObjects);
for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (MI.isDebugInstr())
continue;
int OpIndex;
switch (MI.getOpcode()) {
case AArch64::STGloop:
case AArch64::STZGloop:
OpIndex = 3;
break;
case AArch64::STGOffset:
case AArch64::STZGOffset:
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
OpIndex = 1;
break;
default:
OpIndex = -1;
}
int TaggedFI = -1;
if (OpIndex >= 0) {
const MachineOperand &MO = MI.getOperand(OpIndex);
if (MO.isFI()) {
int FI = MO.getIndex();
if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
FrameObjects[FI].IsValid)
TaggedFI = FI;
}
}
if (TaggedFI >= 0)
GB.AddMember(TaggedFI);
else
GB.EndCurrentGroup();
}
GB.EndCurrentGroup();
}
const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
if (TBPI) {
FrameObjects[*TBPI].ObjectFirst = true;
FrameObjects[*TBPI].GroupFirst = true;
int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
if (FirstGroupIndex >= 0)
for (FrameObject &Object : FrameObjects)
if (Object.GroupIndex == FirstGroupIndex)
Object.GroupFirst = true;
}
llvm::stable_sort(FrameObjects, FrameObjectCompare);
int i = 0;
for (auto &Obj : FrameObjects) {
if (!Obj.IsValid)
break;
ObjectsToAllocate[i++] = Obj.ObjectIndex;
}
LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
: FrameObjects) {
if (!Obj.IsValid)
break;
dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
if (Obj.ObjectFirst)
dbgs() << ", first";
if (Obj.GroupFirst)
dbgs() << ", group-first";
dbgs() << "\n";
});
}