Go to the documentation of this file.
36 #define DEBUG_TYPE "machine-combiner"
38 STATISTIC(NumInstCombined,
"Number of machineinst combined");
42 cl::desc(
"Incremental depth computation will be used for basic "
43 "blocks with more instructions."),
cl::init(500));
46 cl::desc(
"Dump all substituted intrs"),
49 #ifdef EXPENSIVE_CHECKS
51 "machine-combiner-verify-pattern-order",
cl::Hidden,
53 "Verify that the generated patterns are ordered by increasing latency"),
57 "machine-combiner-verify-pattern-order",
cl::Hidden,
59 "Verify that the generated patterns are ordered by increasing latency"),
89 StringRef getPassName()
const override {
return "Machine InstCombiner"; }
92 bool doSubstitute(
unsigned NewSize,
unsigned OldSize,
bool OptForSize);
117 std::pair<unsigned, unsigned>
132 "Machine InstCombiner",
false,
false)
138 void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
139 AU.setPreservesCFG();
156 if (DefInstr && DefInstr->
isPHI())
174 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
175 "Missing machine model\n");
180 for (
auto *InstrPtr : InsInstrs) {
188 unsigned DepthOp = 0;
189 unsigned LatencyOp = 0;
192 if (II != InstrIdxForVirtReg.
end()) {
194 assert(II->second < InstrDepth.size() &&
"Bad Index");
197 "There must be a definition for a new virtual register");
198 DepthOp = InstrDepth[II->second];
200 int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.
getReg());
201 LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
207 LatencyOp = TSchedModel.computeOperandLatency(
209 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.
getReg()));
212 IDepth =
std::max(IDepth, DepthOp + LatencyOp);
214 InstrDepth.push_back(IDepth);
216 unsigned NewRootIdx = InsInstrs.size() - 1;
217 return InstrDepth[NewRootIdx];
231 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
232 "Missing machine model\n");
235 unsigned NewRootLatency = 0;
249 unsigned LatencyOp = 0;
251 LatencyOp = TSchedModel.computeOperandLatency(
255 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
257 NewRootLatency =
std::max(NewRootLatency, LatencyOp);
259 return NewRootLatency;
293 std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
297 assert(!InsInstrs.empty() &&
"Only support sequences that insert instrs.");
298 unsigned NewRootLatency = 0;
301 for (
unsigned i = 0;
i < InsInstrs.size() - 1;
i++)
302 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[
i]);
305 unsigned RootLatency = 0;
306 for (
auto I : DelInstrs)
307 RootLatency += TSchedModel.computeInstrLatency(
I);
309 return {NewRootLatency, RootLatency};
312 bool MachineCombiner::reduceRegisterPressure(
329 bool MachineCombiner::improvesCriticalPathLen(
336 bool SlackIsAccurate) {
337 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
338 "Missing machine model\n");
340 unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
343 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: "
344 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
354 ?
dbgs() <<
"\t and it does it\n"
355 :
dbgs() <<
"\t but it does NOT do it\n");
356 return NewRootDepth < RootDepth;
364 unsigned NewRootLatency, RootLatency;
365 std::tie(NewRootLatency, RootLatency) =
366 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
369 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
370 unsigned OldCycleCount =
371 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
373 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: "
374 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
375 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
376 <<
"\n\tRootDepth + RootLatency + RootSlack = "
379 ?
dbgs() <<
"\n\t It IMPROVES PathLen because"
380 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
382 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
384 return NewCycleCount <= OldCycleCount;
388 void MachineCombiner::instr2instrSC(
391 for (
auto *InstrPtr : Instrs) {
392 unsigned Opc = InstrPtr->getOpcode();
393 unsigned Idx =
TII->get(Opc).getSchedClass();
395 InstrsSC.push_back(
SC);
400 bool MachineCombiner::preservesResourceLen(
404 if (!TSchedModel.hasInstrSchedModel())
411 MBBarr.push_back(
MBB);
418 instr2instrSC(InsInstrs, InsInstrsSC);
419 instr2instrSC(DelInstrs, DelInstrsSC);
425 unsigned ResLenAfterCombine =
429 << ResLenBeforeCombine
430 <<
" and after: " << ResLenAfterCombine <<
"\n";);
432 ResLenAfterCombine <=
433 ResLenBeforeCombine +
TII->getExtendResourceLenLimit()
434 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n"
435 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource "
438 return ResLenAfterCombine <=
439 ResLenBeforeCombine +
TII->getExtendResourceLenLimit();
444 bool MachineCombiner::doSubstitute(
unsigned NewSize,
unsigned OldSize,
446 if (OptForSize && (NewSize < OldSize))
448 if (!TSchedModel.hasInstrSchedModelOrItineraries())
473 bool IncrementalUpdate) {
483 for (
auto *InstrPtr : InsInstrs)
486 for (
auto *InstrPtr : DelInstrs) {
487 InstrPtr->eraseFromParent();
489 for (
auto I = RegUnits.
begin();
I != RegUnits.
end(); ) {
490 if (
I->MI == InstrPtr)
497 if (IncrementalUpdate)
498 for (
auto *InstrPtr : InsInstrs)
508 void MachineCombiner::verifyPatternOrder(
512 (void)PrevLatencyDiff;
513 for (
auto P : Patterns) {
517 TII->genAlternativeCodeSequence(Root,
P, InsInstrs, DelInstrs,
522 if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
525 unsigned NewRootLatency, RootLatency;
526 std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
527 Root, InsInstrs, DelInstrs, MinInstr->getTrace(
MBB));
528 long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
529 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
530 "Current pattern is better than previous pattern.");
531 PrevLatencyDiff = CurrentLatencyDiff;
543 bool Changed =
false;
546 bool IncrementalUpdate =
false;
548 decltype(BlockIter) LastUpdate;
559 bool DoRegPressureReduce =
560 TII->shouldReduceRegisterPressure(
MBB, &RegClassInfo);
562 while (BlockIter !=
MBB->
end()) {
563 auto &
MI = *BlockIter++;
592 if (!
TII->getMachineCombinerPatterns(
MI, Patterns, DoRegPressureReduce))
596 verifyPatternOrder(
MBB,
MI, Patterns);
598 for (
auto P : Patterns) {
602 TII->genAlternativeCodeSequence(
MI,
P, InsInstrs, DelInstrs,
604 unsigned NewInstCount = InsInstrs.size();
605 unsigned OldInstCount = DelInstrs.size();
613 dbgs() <<
"\tFor the Pattern (" << (
int)
P
614 <<
") these instructions could be removed\n";
615 for (
auto const *InstrPtr : DelInstrs)
616 InstrPtr->print(
dbgs(),
false,
false,
618 dbgs() <<
"\tThese instructions could replace the removed ones\n";
619 for (
auto const *InstrPtr : InsInstrs)
620 InstrPtr->print(
dbgs(),
false,
false,
624 bool SubstituteAlways =
false;
625 if (ML &&
TII->isThroughputPattern(
P))
626 SubstituteAlways =
true;
628 if (IncrementalUpdate && LastUpdate != BlockIter) {
630 MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
631 LastUpdate = BlockIter;
634 if (DoRegPressureReduce &&
639 IncrementalUpdate =
true;
640 LastUpdate = BlockIter;
642 if (reduceRegisterPressure(
MI,
MBB, InsInstrs, DelInstrs,
P)) {
645 RegUnits,
TII,
P, IncrementalUpdate);
659 if (SubstituteAlways ||
660 doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
662 RegUnits,
TII,
P, IncrementalUpdate);
673 Traces->verifyAnalysis();
674 if (improvesCriticalPathLen(
MBB, &
MI, BlockTrace, InsInstrs, DelInstrs,
675 InstrIdxForVirtReg,
P,
676 !IncrementalUpdate) &&
677 preservesResourceLen(
MBB, BlockTrace, InsInstrs, DelInstrs)) {
680 IncrementalUpdate =
true;
681 LastUpdate = BlockIter;
685 RegUnits,
TII,
P, IncrementalUpdate);
694 for (
auto *InstrPtr : InsInstrs)
697 InstrIdxForVirtReg.
clear();
701 if (Changed && IncrementalUpdate)
702 Traces->invalidate(
MBB);
708 TII = STI->getInstrInfo();
709 TRI = STI->getRegisterInfo();
710 SchedModel = STI->getSchedModel();
711 TSchedModel.init(STI);
713 MLI = &getAnalysis<MachineLoopInfo>();
714 Traces = &getAnalysis<MachineTraceMetrics>();
715 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
716 MBFI = (PSI && PSI->hasProfileSummary()) ?
717 &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
721 RegClassInfo.runOnMachineFunction(MF);
724 if (!
TII->useMachineCombiner()) {
727 <<
" Skipping pass: Target does not support machine combiner\n");
731 bool Changed =
false;
735 Changed |= combineInstructions(&
MBB);
This is an optimization pass for GlobalISel generic memory operations.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const_iterator begin() const
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVector< MachineInstr *, 16 > InsInstrs, SmallVector< MachineInstr *, 16 > DelInstrs, MachineTraceMetrics::Ensemble *MinInstr, SparseSet< LiveRegUnit > &RegUnits, const TargetInstrInfo *TII, MachineCombinerPattern Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
@ MustReduceRegisterPressure
static reg_iterator reg_end()
unsigned getNumRegUnits() const
Return the number of (native) register units in the target.
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
TargetInstrInfo - Interface to description of machine instruction set.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
const_iterator end() const
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Summarize the scheduling resources required for an instruction of a particular scheduling class.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
Represent the analysis usage information of a pass.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
STATISTIC(NumFunctions, "Total number of functions")
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Analysis providing profile information.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
Provide an instruction scheduling machine model to CodeGen passes.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
initializer< Ty > init(const Ty &Val)
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
iterator find(const_arg_type_t< KeyT > Val)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void initializeMachineCombinerPass(PassRegistry &)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
Register getReg() const
getReg - Returns the register number.
This is an alternative analysis pass to MachineBlockFrequencyInfo.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
TargetSubtargetInfo - Generic base class for all target subtargets.
unsigned const MachineRegisterInfo * MRI
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
static CombinerObjective getCombinerObjective(MachineCombinerPattern P)
INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_END(MachineCombiner
reg_iterator reg_begin(Register RegNo) const
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
Function & getFunction()
Return the LLVM function that this machine code represents.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Machine model for scheduling, bundling, and heuristics.
The core instruction combiner logic.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
COFF::MachineTypes Machine
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
Align max(MaybeAlign Lhs, Align Rhs)
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
iterator_range< mop_iterator > operands()
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.