37#define DEBUG_TYPE "machine-combiner"
39STATISTIC(NumInstCombined,
"Number of machineinst combined");
43 cl::desc(
"Incremental depth computation will be used for basic "
44 "blocks with more instructions."),
cl::init(500));
47 cl::desc(
"Dump all substituted intrs"),
50#ifdef EXPENSIVE_CHECKS
52 "machine-combiner-verify-pattern-order",
cl::Hidden,
54 "Verify that the generated patterns are ordered by increasing latency"),
58 "machine-combiner-verify-pattern-order",
cl::Hidden,
60 "Verify that the generated patterns are ordered by increasing latency"),
104 unsigned Pattern,
bool SlackIsAccurate);
115 std::pair<unsigned, unsigned>
127char MachineCombiner::ID = 0;
131 "Machine InstCombiner",
false,
false)
137void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
138 AU.setPreservesCFG();
154 DefInstr =
MRI->getUniqueVRegDef(MO.
getReg());
161 return MI->isTransient();
167 if (!
MI->isFullCopy()) {
169 if (
MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
172 auto SrcSub =
MI->getOperand(1).getSubReg();
173 auto SrcRC =
MRI->getRegClass(Src);
174 auto DstRC =
MRI->getRegClass(Dst);
175 return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) !=
nullptr;
178 if (Src.isPhysical() && Dst.isPhysical())
181 if (Src.isVirtual() && Dst.isVirtual()) {
182 auto SrcRC =
MRI->getRegClass(Src);
183 auto DstRC =
MRI->getRegClass(Dst);
184 return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
191 auto DstRC =
MRI->getRegClass(Dst);
192 return DstRC->contains(Src);
212 for (
auto *InstrPtr : InsInstrs) {
218 unsigned DepthOp = 0;
219 unsigned LatencyOp = 0;
222 if (
II != InstrIdxForVirtReg.
end()) {
227 "There must be a definition for a new virtual register");
228 DepthOp = InstrDepth[
II->second];
232 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
nullptr);
233 LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
237 if (DefInstr && (
TII->getMachineCombinerTraceStrategy() !=
238 MachineTraceStrategy::TS_Local ||
241 if (!isTransientMI(DefInstr))
242 LatencyOp = TSchedModel.computeOperandLatency(
247 InstrPtr->findRegisterUseOperandIdx(MO.
getReg(),
251 IDepth = std::max(IDepth, DepthOp + LatencyOp);
255 unsigned NewRootIdx = InsInstrs.size() - 1;
256 return InstrDepth[NewRootIdx];
271 unsigned NewRootLatency = 0;
280 if (RI ==
MRI->reg_end())
283 unsigned LatencyOp = 0;
285 LatencyOp = TSchedModel.computeOperandLatency(
291 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
293 NewRootLatency = std::max(NewRootLatency, LatencyOp);
295 return NewRootLatency;
302 case MachineCombinerPattern::REASSOC_AX_BY:
303 case MachineCombinerPattern::REASSOC_AX_YB:
304 case MachineCombinerPattern::REASSOC_XA_BY:
305 case MachineCombinerPattern::REASSOC_XA_YB:
306 return CombinerObjective::MustReduceDepth;
316std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
320 assert(!InsInstrs.
empty() &&
"Only support sequences that insert instrs.");
321 unsigned NewRootLatency = 0;
324 for (
unsigned i = 0; i < InsInstrs.
size() - 1; i++)
325 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
326 NewRootLatency += getLatency(&
MI, NewRoot, BlockTrace);
328 unsigned RootLatency = 0;
329 for (
auto *
I : DelInstrs)
330 RootLatency += TSchedModel.computeInstrLatency(
I);
332 return {NewRootLatency, RootLatency};
335bool MachineCombiner::reduceRegisterPressure(
351bool MachineCombiner::improvesCriticalPathLen(
357 bool SlackIsAccurate) {
359 unsigned NewRootDepth =
360 getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *
MBB);
363 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: "
364 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
371 if (getCombinerObjective(
Pattern) == CombinerObjective::MustReduceDepth) {
374 ?
dbgs() <<
"\t and it does it\n"
375 :
dbgs() <<
"\t but it does NOT do it\n");
376 return NewRootDepth < RootDepth;
384 unsigned NewRootLatency, RootLatency;
385 if (
TII->accumulateInstrSeqToRootLatency(*Root)) {
386 std::tie(NewRootLatency, RootLatency) =
387 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
389 NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.
back());
390 RootLatency = TSchedModel.computeInstrLatency(Root);
394 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
395 unsigned OldCycleCount =
396 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
398 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: "
399 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
400 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
401 <<
"\n\tRootDepth + RootLatency + RootSlack = "
404 ?
dbgs() <<
"\n\t It IMPROVES PathLen because"
405 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
407 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
409 return NewCycleCount <= OldCycleCount;
413void MachineCombiner::instr2instrSC(
416 for (
auto *InstrPtr : Instrs) {
417 unsigned Opc = InstrPtr->getOpcode();
418 unsigned Idx =
TII->get(Opc).getSchedClass();
425bool MachineCombiner::preservesResourceLen(
429 if (!TSchedModel.hasInstrSchedModel())
435 SmallVector <const MachineBasicBlock *, 1> MBBarr;
443 instr2instrSC(InsInstrs, InsInstrsSC);
444 instr2instrSC(DelInstrs, DelInstrsSC);
450 unsigned ResLenAfterCombine =
454 << ResLenBeforeCombine
455 <<
" and after: " << ResLenAfterCombine <<
"\n");
457 ResLenAfterCombine <=
458 ResLenBeforeCombine +
TII->getExtendResourceLenLimit()
459 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n"
460 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource "
463 return ResLenAfterCombine <=
464 ResLenBeforeCombine +
TII->getExtendResourceLenLimit();
487 bool IncrementalUpdate) {
497 for (
auto *InstrPtr : InsInstrs)
500 for (
auto *InstrPtr : DelInstrs) {
501 InstrPtr->eraseFromParent();
503 for (
auto *
I = RegUnits.
begin();
I != RegUnits.
end();) {
504 if (
I->MI == InstrPtr)
511 if (IncrementalUpdate)
512 for (
auto *InstrPtr : InsInstrs)
525 long PrevLatencyDiff = std::numeric_limits<long>::max();
526 (void)PrevLatencyDiff;
527 for (
auto P : Patterns) {
531 TII->genAlternativeCodeSequence(Root,
P, InsInstrs, DelInstrs,
536 if (InsInstrs.
empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
539 unsigned NewRootLatency, RootLatency;
540 std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
541 Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(
MBB));
542 long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
543 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
544 "Current pattern is better than previous pattern.");
545 PrevLatencyDiff = CurrentLatencyDiff;
557 bool Changed =
false;
560 bool IncrementalUpdate =
false;
562 decltype(BlockIter) LastUpdate;
566 TraceEnsemble = Traces->getEnsemble(
TII->getMachineCombinerTraceStrategy());
573 bool DoRegPressureReduce =
574 TII->shouldReduceRegisterPressure(
MBB, &RegClassInfo);
576 while (BlockIter !=
MBB->
end()) {
577 auto &
MI = *BlockIter++;
606 if (!
TII->getMachineCombinerPatterns(
MI, Patterns, DoRegPressureReduce))
610 verifyPatternOrder(
MBB,
MI, Patterns);
612 for (
const auto P : Patterns) {
616 TII->genAlternativeCodeSequence(
MI,
P, InsInstrs, DelInstrs,
621 if (InsInstrs.
empty())
625 dbgs() <<
"\tFor the Pattern (" << (int)
P
626 <<
") these instructions could be removed\n";
627 for (
auto const *InstrPtr : DelInstrs)
628 InstrPtr->print(
dbgs(),
false,
false,
630 dbgs() <<
"\tThese instructions could replace the removed ones\n";
631 for (
auto const *InstrPtr : InsInstrs)
632 InstrPtr->print(
dbgs(),
false,
false,
636 if (IncrementalUpdate && LastUpdate != BlockIter) {
638 TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
639 LastUpdate = BlockIter;
642 if (DoRegPressureReduce &&
643 getCombinerObjective(
P) ==
644 CombinerObjective::MustReduceRegisterPressure) {
647 IncrementalUpdate =
true;
648 LastUpdate = BlockIter;
650 if (reduceRegisterPressure(
MI,
MBB, InsInstrs, DelInstrs,
P)) {
653 RegUnits,
TII,
P, IncrementalUpdate);
663 if (
ML &&
TII->isThroughputPattern(
P)) {
664 LLVM_DEBUG(
dbgs() <<
"\t Replacing due to throughput pattern in loop\n");
666 RegUnits,
TII,
P, IncrementalUpdate);
670 }
else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
672 << InsInstrs.size() <<
" < "
673 << DelInstrs.size() <<
")\n");
675 RegUnits,
TII,
P, IncrementalUpdate);
686 Traces->verifyAnalysis();
687 if (improvesCriticalPathLen(
MBB, &
MI, BlockTrace, InsInstrs, DelInstrs,
688 InstrIdxForVirtReg,
P,
689 !IncrementalUpdate) &&
690 preservesResourceLen(
MBB, BlockTrace, InsInstrs, DelInstrs)) {
693 IncrementalUpdate =
true;
694 LastUpdate = BlockIter;
698 RegUnits,
TII,
P, IncrementalUpdate);
707 for (
auto *InstrPtr : InsInstrs)
710 InstrIdxForVirtReg.
clear();
714 if (Changed && IncrementalUpdate)
715 Traces->invalidate(
MBB);
721 TII = STI->getInstrInfo();
722 TRI = STI->getRegisterInfo();
723 SchedModel = STI->getSchedModel();
724 TSchedModel.init(STI);
726 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
727 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
728 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
729 MBFI = (PSI && PSI->hasProfileSummary()) ?
730 &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
732 TraceEnsemble =
nullptr;
733 RegClassInfo.runOnMachineFunction(MF);
736 if (!
TII->useMachineCombiner()) {
739 <<
" Skipping pass: Target does not support machine combiner\n");
743 bool Changed =
false;
747 Changed |= combineInstructions(&
MBB);
unsigned const MachineRegisterInfo * MRI
COFF::MachineTypes Machine
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, MachineTraceMetrics::Ensemble *TraceEnsemble, SparseSet< LiveRegUnit > &RegUnits, const TargetInstrInfo *TII, unsigned Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator find(const_arg_type_t< KeyT > Val)
The core instruction combiner logic.
This is an alternative analysis pass to MachineBlockFrequencyInfo.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
const_iterator begin() const
const_iterator end() const
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
void initializeMachineCombinerPass(PassRegistry &)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...