43#define GET_GICOMBINER_DEPS
44#include "AArch64GenPostLegalizeGICombiner.inc"
45#undef GET_GICOMBINER_DEPS
47#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
50using namespace MIPatternMatch;
54#define GET_GICOMBINER_TYPES
55#include "AArch64GenPostLegalizeGICombiner.inc"
56#undef GET_GICOMBINER_TYPES
66bool matchExtractVecEltPairwiseAdd(
68 std::tuple<unsigned, LLT, Register> &MatchInfo) {
71 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
74 if (!Cst || Cst->Value != 0)
85 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
88 Register Src1Op1 = FAddMI->getOperand(1).getReg();
89 Register Src1Op2 = FAddMI->getOperand(2).getReg();
101 std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
102 std::get<1>(MatchInfo) = DstTy;
103 std::get<2>(MatchInfo) =
Other->getOperand(0).getReg();
109void applyExtractVecEltPairwiseAdd(
111 std::tuple<unsigned, LLT, Register> &MatchInfo) {
112 unsigned Opc = std::get<0>(MatchInfo);
113 assert(Opc == TargetOpcode::G_FADD &&
"Unexpected opcode!");
115 LLT Ty = std::get<1>(MatchInfo);
116 Register Src = std::get<2>(MatchInfo);
118 B.setInstrAndDebugLoc(
MI);
119 auto Elt0 =
B.buildExtractVectorElement(Ty, Src,
B.buildConstant(s64, 0));
120 auto Elt1 =
B.buildExtractVectorElement(Ty, Src,
B.buildConstant(s64, 1));
121 B.buildInstr(Opc, {
MI.getOperand(0).
getReg()}, {Elt0, Elt1});
122 MI.eraseFromParent();
127 unsigned Opc =
MRI.getVRegDef(R)->getOpcode();
128 return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
133 return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
136bool matchAArch64MulConstCombine(
139 assert(
MI.getOpcode() == TargetOpcode::G_MUL);
143 const LLT Ty =
MRI.getType(LHS);
164 unsigned TrailingZeroes = ConstValue.
countr_zero();
165 if (TrailingZeroes) {
168 if (
MRI.hasOneNonDBGUse(LHS) &&
173 if (
MRI.hasOneNonDBGUse(Dst)) {
175 unsigned UseOpc =
UseMI.getOpcode();
176 if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
177 UseOpc == TargetOpcode::G_SUB)
183 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
185 unsigned ShiftAmt, AddSubOpc;
187 bool ShiftValUseIsLHS =
true;
189 bool NegateResult =
false;
195 APInt SCVMinus1 = ShiftedConstValue - 1;
196 APInt CVPlus1 = ConstValue + 1;
199 AddSubOpc = TargetOpcode::G_ADD;
202 AddSubOpc = TargetOpcode::G_SUB;
208 APInt CVNegPlus1 = -ConstValue + 1;
209 APInt CVNegMinus1 = -ConstValue - 1;
212 AddSubOpc = TargetOpcode::G_SUB;
213 ShiftValUseIsLHS =
false;
216 AddSubOpc = TargetOpcode::G_ADD;
222 if (NegateResult && TrailingZeroes)
226 auto Shift =
B.buildConstant(
LLT::scalar(64), ShiftAmt);
227 auto ShiftedVal =
B.buildShl(Ty, LHS, Shift);
229 Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) :
LHS;
230 Register AddSubRHS = ShiftValUseIsLHS ?
LHS : ShiftedVal.getReg(0);
231 auto Res =
B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
232 assert(!(NegateResult && TrailingZeroes) &&
233 "NegateResult and TrailingZeroes cannot both be true for now.");
236 B.buildSub(DstReg,
B.buildConstant(Ty, 0), Res);
240 if (TrailingZeroes) {
241 B.buildShl(DstReg, Res,
B.buildConstant(
LLT::scalar(64), TrailingZeroes));
244 B.buildCopy(DstReg, Res.getReg(0));
249void applyAArch64MulConstCombine(
252 B.setInstrAndDebugLoc(
MI);
253 ApplyFn(
B,
MI.getOperand(0).getReg());
254 MI.eraseFromParent();
260 auto &
Merge = cast<GMerge>(
MI);
273 MI.setDesc(
B.getTII().get(TargetOpcode::G_ZEXT));
288 assert(
MI.getOpcode() == TargetOpcode::G_ANYEXT);
291 return MRI.getType(Dst).isScalar() &&
301 MI.setDesc(
B.getTII().get(TargetOpcode::G_ZEXT));
309 if (!
Store.isSimple())
318 if (!
MRI.hasOneNonDBGUse(
Store.getValueReg()))
322 return MaybeCst && MaybeCst->isZero();
328 B.setInstrAndDebugLoc(
MI);
331 "Expected a vector store value");
334 auto Zero =
B.buildConstant(NewTy, 0);
335 auto HighPtr =
B.buildPtrAdd(
MRI.getType(PtrReg), PtrReg,
337 auto &MF = *
MI.getMF();
338 auto *LowMMO = MF.getMachineMemOperand(&
Store.getMMO(), 0, NewTy);
339 auto *HighMMO = MF.getMachineMemOperand(&
Store.getMMO(), 8, NewTy);
340 B.buildStore(Zero, PtrReg, *LowMMO);
341 B.buildStore(Zero, HighPtr, *HighMMO);
342 Store.eraseFromParent();
346 std::tuple<Register, Register, Register> &MatchInfo) {
347 const LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
357 auto *BV1 = getOpcodeDef<GBuildVector>(BVO1,
MRI);
358 auto *BV2 = getOpcodeDef<GBuildVector>(BVO2,
MRI);
367 if (!ValAndVReg1 || !ValAndVReg2 ||
368 ValAndVReg1->Value != ~ValAndVReg2->Value)
372 MatchInfo = {AO1, AO2, BVO1};
378 std::tuple<Register, Register, Register> &MatchInfo) {
379 B.setInstrAndDebugLoc(
MI);
381 AArch64::G_BSP, {
MI.getOperand(0).
getReg()},
382 {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
383 MI.eraseFromParent();
389 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
397 if (AndMI->getOpcode() != TargetOpcode::G_AND)
400 if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
405 *
MRI.getVRegDef(
MI.getOperand(2).getReg()),
MRI);
407 *
MRI.getVRegDef(AndMI->getOperand(2).getReg()),
MRI);
409 *
MRI.getVRegDef(LShrMI->getOperand(2).getReg()),
MRI);
410 if (!V1.has_value() || !
V2.has_value() || !V3.has_value())
413 if (!V1.value().isMask(HalfSize) ||
V2.value() != (1ULL | 1ULL << HalfSize) ||
414 V3 != (HalfSize - 1))
417 SrcReg = LShrMI->getOperand(1).getReg();
425 LLT DstTy =
MRI.getType(DstReg);
430 Register ZeroVec =
B.buildConstant(HalfTy, 0).getReg(0);
432 B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).
getReg(0);
434 B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
437 B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).
getReg(0);
438 MI.eraseFromParent();
441class AArch64PostLegalizerCombinerImpl :
public Combiner {
445 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
449 AArch64PostLegalizerCombinerImpl(
452 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
456 static const char *
getName() {
return "AArch64PostLegalizerCombiner"; }
461#define GET_GICOMBINER_CLASS_MEMBERS
462#include "AArch64GenPostLegalizeGICombiner.inc"
463#undef GET_GICOMBINER_CLASS_MEMBERS
466#define GET_GICOMBINER_IMPL
467#include "AArch64GenPostLegalizeGICombiner.inc"
468#undef GET_GICOMBINER_IMPL
470AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
473 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
476 :
Combiner(MF, CInfo, TPC, &KB, CSEInfo),
477 Helper(Observer,
B,
false, &KB, MDT, LI),
478 RuleConfig(RuleConfig), STI(STI),
480#include
"AArch64GenPostLegalizeGICombiner.inc"
489 AArch64PostLegalizerCombiner(
bool IsOptNone =
false);
492 return "AArch64PostLegalizerCombiner";
500 AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
520void AArch64PostLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
535AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(
bool IsOptNone)
539 if (!RuleConfig.parseCommandLineOption())
543bool AArch64PostLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
545 MachineFunctionProperties::Property::FailedISel))
548 MachineFunctionProperties::Property::Legalized) &&
549 "Expected a legalized function?");
550 auto *TPC = &getAnalysis<TargetPassConfig>();
556 const auto *LI =
ST.getLegalizerInfo();
558 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
561 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
563 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
564 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
567 nullptr, EnableOpt,
F.hasOptSize(),
570 CInfo.MaxIterations = 1;
571 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
573 CInfo.EnableFullDCE =
false;
574 AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
575 RuleConfig, ST, MDT, LI);
576 bool Changed = Impl.combineMachineInstrs();
579 MIB.setCSEInfo(CSEInfo);
580 Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
584bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
586 if (Stores.
size() <= 2)
590 int64_t BaseOffset = Stores[0].Offset;
591 unsigned NumPairsExpected = Stores.
size() / 2;
592 unsigned TotalInstsExpected = NumPairsExpected + (Stores.
size() % 2);
596 if (!TLI.isLegalAddImmediate(BaseOffset))
597 TotalInstsExpected++;
598 int SavingsExpected = Stores.
size() - TotalInstsExpected;
599 if (SavingsExpected <= 0)
606 Register NewBase = Stores[0].Ptr->getReg(0);
607 for (
auto &SInfo : Stores) {
611 auto NewPtr = MIB.
buildPtrAdd(
MRI.getType(SInfo.St->getPointerReg()),
615 SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
620 <<
" stores into a base pointer and offsets.\n");
627 cl::desc(
"Enable consecutive memop optimization "
628 "in AArch64PostLegalizerCombiner"));
630bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
653 bool Changed =
false;
672 if (
Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
673 (
Last.Offset +
static_cast<int64_t
>(
Last.StoredType.getSizeInBytes()) !=
675 Last.StoredType != New.StoredType)
681 return New.St->getValueReg() == LoadVal;
688 int64_t MaxLegalOffset;
689 switch (
New.StoredType.getSizeInBits()) {
691 MaxLegalOffset = 252;
694 MaxLegalOffset = 504;
697 MaxLegalOffset = 1008;
702 if (
New.Offset < MaxLegalOffset)
706 return New.Offset - Stores[0].Offset <= MaxLegalOffset;
709 auto resetState = [&]() {
711 LoadValsSinceLastStore.
clear();
714 for (
auto &
MBB : MF) {
718 for (
auto &
MI :
MBB) {
720 if (
auto *LdSt = dyn_cast<GLoadStore>(&
MI);
721 LdSt &&
MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())
724 if (
auto *St = dyn_cast<GStore>(&
MI)) {
727 LLT StoredValTy =
MRI.getType(St->getValueReg());
729 if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
732 Register PtrReg = St->getPointerReg();
736 GPtrAdd *PtrAdd = cast<GPtrAdd>(
MRI.getVRegDef(PtrReg));
739 if (Stores.
empty()) {
746 if (storeIsValid(
Last, New)) {
748 LoadValsSinceLastStore.
clear();
752 Changed |= tryOptimizeConsecStores(Stores, MIB);
757 }
else if (
auto *Ld = dyn_cast<GLoad>(&
MI)) {
758 LoadValsSinceLastStore.
push_back(Ld->getDstReg());
761 Changed |= tryOptimizeConsecStores(Stores, MIB);
768char AArch64PostLegalizerCombiner::ID = 0;
770 "Combine AArch64 MachineInstrs after legalization",
false,
780 return new AArch64PostLegalizerCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static cl::opt< bool > EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", cl::init(true), cl::Hidden, cl::desc("Enable consecutive memop optimization " "in AArch64PostLegalizerCombiner"))
Combine AArch64 MachineInstrs after legalization
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
std::optional< std::vector< StOtherPiece > > Other
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
unsigned countr_zero() const
Count the number of trailing zero bits.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
Simple wrapper that does the following.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
GISelChangeObserver * getObserver()
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
ArrayRef< int > getShuffleMask() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
virtual const TargetLowering * getTargetLowering() const
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &)
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
FunctionPass * createAArch64PostLegalizerCombiner(bool IsOptNone)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...