37#define DEBUG_TYPE "machine-combiner"
39STATISTIC(NumInstCombined,
"Number of machineinst combined");
43 cl::desc(
"Incremental depth computation will be used for basic "
44 "blocks with more instructions."),
cl::init(500));
47 cl::desc(
"Dump all substituted intrs"),
50#ifdef EXPENSIVE_CHECKS
52 "machine-combiner-verify-pattern-order",
cl::Hidden,
54 "Verify that the generated patterns are ordered by increasing latency"),
58 "machine-combiner-verify-pattern-order",
cl::Hidden,
60 "Verify that the generated patterns are ordered by increasing latency"),
107 unsigned Pattern,
bool SlackIsAccurate);
118 std::pair<unsigned, unsigned>
130char MachineCombiner::ID = 0;
134 "Machine InstCombiner",
false,
false)
140void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
141 AU.setPreservesCFG();
157 DefInstr =
MRI->getUniqueVRegDef(MO.
getReg());
164 return MI->isTransient();
170 if (!
MI->isFullCopy()) {
172 if (
MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
175 auto SrcSub =
MI->getOperand(1).getSubReg();
176 auto SrcRC =
MRI->getRegClass(Src);
177 auto DstRC =
MRI->getRegClass(Dst);
178 return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) !=
nullptr;
181 if (Src.isPhysical() && Dst.isPhysical())
184 if (Src.isVirtual() && Dst.isVirtual()) {
185 auto SrcRC =
MRI->getRegClass(Src);
186 auto DstRC =
MRI->getRegClass(Dst);
187 return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
194 auto DstRC =
MRI->getRegClass(Dst);
195 return DstRC->contains(Src);
215 for (
auto *InstrPtr : InsInstrs) {
221 unsigned DepthOp = 0;
222 unsigned LatencyOp = 0;
225 if (II != InstrIdxForVirtReg.
end()) {
227 assert(II->second < InstrDepth.
size() &&
"Bad Index");
230 "There must be a definition for a new virtual register");
231 DepthOp = InstrDepth[II->second];
233 int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.
getReg());
234 LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
238 if (DefInstr && (
TII->getMachineCombinerTraceStrategy() !=
239 MachineTraceStrategy::TS_Local ||
242 if (!isTransientMI(DefInstr))
243 LatencyOp = TSchedModel.computeOperandLatency(
245 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.
getReg()));
248 IDepth = std::max(IDepth, DepthOp + LatencyOp);
252 unsigned NewRootIdx = InsInstrs.size() - 1;
253 return InstrDepth[NewRootIdx];
268 unsigned NewRootLatency = 0;
277 if (RI ==
MRI->reg_end())
280 unsigned LatencyOp = 0;
282 LatencyOp = TSchedModel.computeOperandLatency(
286 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
288 NewRootLatency = std::max(NewRootLatency, LatencyOp);
290 return NewRootLatency;
297 case MachineCombinerPattern::REASSOC_AX_BY:
298 case MachineCombinerPattern::REASSOC_AX_YB:
299 case MachineCombinerPattern::REASSOC_XA_BY:
300 case MachineCombinerPattern::REASSOC_XA_YB:
301 return CombinerObjective::MustReduceDepth;
311std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
315 assert(!InsInstrs.
empty() &&
"Only support sequences that insert instrs.");
316 unsigned NewRootLatency = 0;
319 for (
unsigned i = 0; i < InsInstrs.
size() - 1; i++)
320 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
323 unsigned RootLatency = 0;
324 for (
auto *
I : DelInstrs)
325 RootLatency += TSchedModel.computeInstrLatency(
I);
327 return {NewRootLatency, RootLatency};
330bool MachineCombiner::reduceRegisterPressure(
346bool MachineCombiner::improvesCriticalPathLen(
352 bool SlackIsAccurate) {
354 unsigned NewRootDepth =
355 getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *
MBB);
358 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: "
359 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
366 if (getCombinerObjective(
Pattern) == CombinerObjective::MustReduceDepth) {
369 ?
dbgs() <<
"\t and it does it\n"
370 :
dbgs() <<
"\t but it does NOT do it\n");
371 return NewRootDepth < RootDepth;
379 unsigned NewRootLatency, RootLatency;
380 if (
TII->accumulateInstrSeqToRootLatency(*Root)) {
381 std::tie(NewRootLatency, RootLatency) =
382 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
384 NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.
back());
385 RootLatency = TSchedModel.computeInstrLatency(Root);
389 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
390 unsigned OldCycleCount =
391 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
393 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: "
394 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
395 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
396 <<
"\n\tRootDepth + RootLatency + RootSlack = "
399 ?
dbgs() <<
"\n\t It IMPROVES PathLen because"
400 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
402 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
404 return NewCycleCount <= OldCycleCount;
408void MachineCombiner::instr2instrSC(
411 for (
auto *InstrPtr : Instrs) {
412 unsigned Opc = InstrPtr->getOpcode();
413 unsigned Idx =
TII->get(Opc).getSchedClass();
420bool MachineCombiner::preservesResourceLen(
424 if (!TSchedModel.hasInstrSchedModel())
430 SmallVector <const MachineBasicBlock *, 1> MBBarr;
438 instr2instrSC(InsInstrs, InsInstrsSC);
439 instr2instrSC(DelInstrs, DelInstrsSC);
445 unsigned ResLenAfterCombine =
449 << ResLenBeforeCombine
450 <<
" and after: " << ResLenAfterCombine <<
"\n";);
452 ResLenAfterCombine <=
453 ResLenBeforeCombine +
TII->getExtendResourceLenLimit()
454 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n"
455 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource "
458 return ResLenAfterCombine <=
459 ResLenBeforeCombine +
TII->getExtendResourceLenLimit();
482 bool IncrementalUpdate) {
492 for (
auto *InstrPtr : InsInstrs)
495 for (
auto *InstrPtr : DelInstrs) {
496 InstrPtr->eraseFromParent();
498 for (
auto *
I = RegUnits.
begin();
I != RegUnits.
end();) {
499 if (
I->MI == InstrPtr)
506 if (IncrementalUpdate)
507 for (
auto *InstrPtr : InsInstrs)
520 long PrevLatencyDiff = std::numeric_limits<long>::max();
521 (void)PrevLatencyDiff;
522 for (
auto P : Patterns) {
526 TII->genAlternativeCodeSequence(Root,
P, InsInstrs, DelInstrs,
531 if (InsInstrs.
empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
534 unsigned NewRootLatency, RootLatency;
535 std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
536 Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(
MBB));
537 long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
538 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
539 "Current pattern is better than previous pattern.");
540 PrevLatencyDiff = CurrentLatencyDiff;
552 bool Changed =
false;
555 bool IncrementalUpdate =
false;
557 decltype(BlockIter) LastUpdate;
561 TraceEnsemble = Traces->getEnsemble(
TII->getMachineCombinerTraceStrategy());
568 bool DoRegPressureReduce =
569 TII->shouldReduceRegisterPressure(
MBB, &RegClassInfo);
571 while (BlockIter !=
MBB->
end()) {
572 auto &
MI = *BlockIter++;
601 if (!
TII->getMachineCombinerPatterns(
MI, Patterns, DoRegPressureReduce))
605 verifyPatternOrder(
MBB,
MI, Patterns);
607 for (
const auto P : Patterns) {
611 TII->genAlternativeCodeSequence(
MI,
P, InsInstrs, DelInstrs,
616 if (InsInstrs.
empty())
620 dbgs() <<
"\tFor the Pattern (" << (int)
P
621 <<
") these instructions could be removed\n";
622 for (
auto const *InstrPtr : DelInstrs)
623 InstrPtr->print(
dbgs(),
false,
false,
625 dbgs() <<
"\tThese instructions could replace the removed ones\n";
626 for (
auto const *InstrPtr : InsInstrs)
627 InstrPtr->print(
dbgs(),
false,
false,
631 if (IncrementalUpdate && LastUpdate != BlockIter) {
633 TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
634 LastUpdate = BlockIter;
637 if (DoRegPressureReduce &&
638 getCombinerObjective(
P) ==
639 CombinerObjective::MustReduceRegisterPressure) {
642 IncrementalUpdate =
true;
643 LastUpdate = BlockIter;
645 if (reduceRegisterPressure(
MI,
MBB, InsInstrs, DelInstrs,
P)) {
648 RegUnits,
TII,
P, IncrementalUpdate);
658 if (
ML &&
TII->isThroughputPattern(
P)) {
659 LLVM_DEBUG(
dbgs() <<
"\t Replacing due to throughput pattern in loop\n");
661 RegUnits,
TII,
P, IncrementalUpdate);
665 }
else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
667 << InsInstrs.size() <<
" < "
668 << DelInstrs.size() <<
")\n");
670 RegUnits,
TII,
P, IncrementalUpdate);
681 Traces->verifyAnalysis();
682 if (improvesCriticalPathLen(
MBB, &
MI, BlockTrace, InsInstrs, DelInstrs,
683 InstrIdxForVirtReg,
P,
684 !IncrementalUpdate) &&
685 preservesResourceLen(
MBB, BlockTrace, InsInstrs, DelInstrs)) {
688 IncrementalUpdate =
true;
689 LastUpdate = BlockIter;
693 RegUnits,
TII,
P, IncrementalUpdate);
702 for (
auto *InstrPtr : InsInstrs)
705 InstrIdxForVirtReg.
clear();
709 if (Changed && IncrementalUpdate)
710 Traces->invalidate(
MBB);
716 TII = STI->getInstrInfo();
717 TRI = STI->getRegisterInfo();
718 SchedModel = STI->getSchedModel();
719 TSchedModel.init(STI);
721 MLI = &getAnalysis<MachineLoopInfo>();
722 Traces = &getAnalysis<MachineTraceMetrics>();
723 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
724 MBFI = (PSI && PSI->hasProfileSummary()) ?
725 &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
727 TraceEnsemble =
nullptr;
729 RegClassInfo.runOnMachineFunction(MF);
732 if (!
TII->useMachineCombiner()) {
735 <<
" Skipping pass: Target does not support machine combiner\n");
739 bool Changed =
false;
743 Changed |= combineInstructions(&
MBB);
unsigned const MachineRegisterInfo * MRI
COFF::MachineTypes Machine
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
const HexagonInstrInfo * TII
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, MachineTraceMetrics::Ensemble *TraceEnsemble, SparseSet< LiveRegUnit > &RegUnits, const TargetInstrInfo *TII, unsigned Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator find(const_arg_type_t< KeyT > Val)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
The core instruction combiner logic.
This is an alternative analysis pass to MachineBlockFrequencyInfo.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
const MachineBasicBlock * getParent() const
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
const_iterator begin() const
const_iterator end() const
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
void initializeMachineCombinerPass(PassRegistry &)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...