blob: b50af38683ed1b60dc5a43ea7b3aac5ad24e1af1 [file] [log] [blame]
//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// \file
// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
// there is a long branch. Branch size at this point is difficult to track since
// we have no idea what spills will be inserted later on. We just assume 8 bytes
// per instruction to compute approximations without computing the actual
// instruction size to see if we're in the neighborhood of the maximum branch
// distrance threshold tuning of what is considered "long" is handled through
// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
namespace {
static cl::opt<double> LongBranchFactor(
"amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
cl::desc("Factor to apply to what qualifies as a long branch "
"to reserve a pair of scalar registers. If this value "
"is 0 the long branch registers are never reserved. As this "
"value grows the greater chance the branch distance will fall "
"within the threshold and the registers will be marked to be "
"reserved. We lean towards always reserving a register for "
"long jumps"));
class GCNPreRALongBranchReg : public MachineFunctionPass {
struct BasicBlockInfo {
// Offset - Distance from the beginning of the function to the beginning
// of this basic block.
uint64_t Offset = 0;
// Size - Size of the basic block in bytes
uint64_t Size = 0;
};
void generateBlockInfo(MachineFunction &MF,
SmallVectorImpl<BasicBlockInfo> &BlockInfo);
public:
static char ID;
GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Pre-RA Long Branch Reg";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
char GCNPreRALongBranchReg::ID = 0;
INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
"AMDGPU Pre-RA Long Branch Reg", false, false)
char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
void GCNPreRALongBranchReg::generateBlockInfo(
MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
BlockInfo.resize(MF.getNumBlockIDs());
// Approximate the size of all basic blocks by just
// assuming 8 bytes per instruction
for (const MachineBasicBlock &MBB : MF) {
uint64_t NumInstr = 0;
// Loop through the basic block and add up all non-debug
// non-meta instructions
for (const MachineInstr &MI : MBB) {
// isMetaInstruction is a superset of isDebugIstr
if (MI.isMetaInstruction())
continue;
NumInstr += 1;
}
// Approximate size as just 8 bytes per instruction
BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
}
uint64_t PrevNum = (&MF)->begin()->getNumber();
for (auto &MBB :
make_range(std::next(MachineFunction::iterator((&MF)->begin())),
(&MF)->end())) {
uint64_t Num = MBB.getNumber();
// Compute the offset immediately following this block.
BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
PrevNum = Num;
}
}
bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = STM.getInstrInfo();
const SIRegisterInfo *TRI = STM.getRegisterInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
// For now, reserve highest available SGPR pair. After RA,
// shift down to a lower unused pair of SGPRs
// If all registers are used, then findUnusedRegister will return
// AMDGPU::NoRegister.
constexpr bool ReserveHighestRegister = true;
Register LongBranchReservedReg = TRI->findUnusedRegister(
MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
if (!LongBranchReservedReg)
return false;
// Approximate code size and offsets of each basic block
SmallVector<BasicBlockInfo, 16> BlockInfo;
generateBlockInfo(MF, BlockInfo);
for (const MachineBasicBlock &MBB : MF) {
MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
if (Last == MBB.end() || !Last->isUnconditionalBranch())
continue;
MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
uint64_t BlockDistance = static_cast<uint64_t>(
LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
// If the distance falls outside the threshold assume it is a long branch
// and we need to reserve the registers
if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
MFI->setLongBranchReservedReg(LongBranchReservedReg);
return true;
}
}
return false;
}