54using namespace PatternMatch;
56#define DEBUG_TYPE "loop-idiom-vectorize"
60 cl::desc(
"Disable Loop Idiom Vectorize Pass."));
65 cl::desc(
"Proceed with Loop Idiom Vectorize Pass, but do "
66 "not convert byte-compare loop(s)."));
70 cl::desc(
"Verify loops generated Loop Idiom Vectorize Pass."));
74class LoopIdiomVectorize {
75 Loop *CurLoop =
nullptr;
93 bool runOnCountableLoop();
97 bool recognizeByteCompare();
115 const auto *
DL = &L.getHeader()->getDataLayout();
117 LoopIdiomVectorize LIT(&AR.
DT, &AR.
LI, &AR.
TTI,
DL);
130bool LoopIdiomVectorize::run(
Loop *L) {
133 Function &
F = *L->getHeader()->getParent();
137 if (
F.hasFnAttribute(Attribute::NoImplicitFloat)) {
139 <<
" due to its NoImplicitFloat attribute");
145 if (!L->getLoopPreheader())
149 << CurLoop->getHeader()->getName() <<
"\n");
151 return recognizeByteCompare();
154bool LoopIdiomVectorize::recognizeByteCompare() {
169 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2)
172 PHINode *PN = dyn_cast<PHINode>(&Header->front());
176 auto LoopBlocks = CurLoop->getBlocks();
185 if (LoopBlocks[0]->sizeWithoutDebug() > 4)
199 if (LoopBlocks[1]->sizeWithoutDebug() > 7)
203 Value *StartIdx =
nullptr;
214 if (!
Index || !
Index->getType()->isIntegerTy(32) ||
224 for (
User *U :
I.users())
225 if (!CurLoop->contains(cast<Instruction>(U)))
232 if (!
match(Header->getTerminator(),
243 Value *LoadA, *LoadB;
254 LoadInst *LoadAI = cast<LoadInst>(LoadA);
255 LoadInst *LoadBI = cast<LoadInst>(LoadB);
269 if (!CurLoop->isLoopInvariant(PtrA) || !CurLoop->isLoopInvariant(PtrB) ||
308 if (FoundBB == EndBB) {
310 Value *WhileCondVal = EndPN.getIncomingValueForBlock(Header);
311 Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB);
317 if (WhileCondVal != WhileBodyVal &&
318 ((WhileCondVal !=
Index && WhileCondVal != MaxLen) ||
319 (WhileBodyVal !=
Index)))
329 transformByteCompare(GEPA, GEPB, PN, MaxLen,
Index, StartIdx,
true,
334Value *LoopIdiomVectorize::expandFindMismatch(
341 BasicBlock *Preheader = CurLoop->getLoopPreheader();
349 SplitBlock(Preheader, PHBranch, DT, LI,
nullptr,
"mismatch_end");
365 Ctx,
"mismatch_min_it_check", EndBlock->
getParent(), EndBlock);
371 Ctx,
"mismatch_mem_check", EndBlock->
getParent(), EndBlock);
374 Ctx,
"mismatch_vec_loop_preheader", EndBlock->
getParent(), EndBlock);
377 Ctx,
"mismatch_vec_loop", EndBlock->
getParent(), EndBlock);
380 Ctx,
"mismatch_vec_loop_inc", EndBlock->
getParent(), EndBlock);
383 Ctx,
"mismatch_vec_loop_found", EndBlock->
getParent(), EndBlock);
386 Ctx,
"mismatch_loop_pre", EndBlock->
getParent(), EndBlock);
392 Ctx,
"mismatch_loop_inc", EndBlock->
getParent(), EndBlock);
398 auto VectorLoop = LI->AllocateLoop();
399 auto ScalarLoop = LI->AllocateLoop();
401 if (CurLoop->getParentLoop()) {
402 CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI);
403 CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI);
404 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopPreheaderBlock,
406 CurLoop->getParentLoop()->addChildLoop(VectorLoop);
407 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopMismatchBlock, *LI);
408 CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI);
409 CurLoop->getParentLoop()->addChildLoop(ScalarLoop);
411 LI->addTopLevelLoop(VectorLoop);
412 LI->addTopLevelLoop(ScalarLoop);
416 VectorLoop->addBasicBlockToLoop(VectorLoopStartBlock, *LI);
417 VectorLoop->addBasicBlockToLoop(VectorLoopIncBlock, *LI);
419 ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI);
420 ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI);
435 LLVMContext::MD_prof,
437 Builder.
Insert(MinItCheckBr);
477 Value *CombinedPageCmp = Builder.
CreateOr(LhsPageCmp, RhsPageCmp);
479 LoopPreHeaderBlock, VectorLoopPreheaderBlock, CombinedPageCmp);
483 Builder.
Insert(CombinedPageCmpCmpBr);
503 Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd});
506 VecLen = Builder.
CreateMul(VecLen, ConstantInt::get(I64Type, 16),
"",
513 Builder.
Insert(JumpToVectorLoop);
516 VectorLoopStartBlock}});
521 PHINode *LoopPred = Builder.
CreatePHI(PredVTy, 2,
"mismatch_vec_loop_pred");
522 LoopPred->
addIncoming(InitialPred, VectorLoopPreheaderBlock);
523 PHINode *VectorIndexPhi = Builder.
CreatePHI(I64Type, 2,
"mismatch_vec_index");
524 VectorIndexPhi->
addIncoming(ExtStart, VectorLoopPreheaderBlock);
528 Value *VectorLhsGep =
531 Align(1), LoopPred, Passthru);
533 Value *VectorRhsGep =
536 Align(1), LoopPred, Passthru);
539 VectorMatchCmp = Builder.
CreateSelect(LoopPred, VectorMatchCmp, PFalse);
542 VectorLoopMismatchBlock, VectorLoopIncBlock, VectorMatchHasActiveLanes);
543 Builder.
Insert(VectorEarlyExit);
553 Value *NewVectorIndexPhi =
554 Builder.
CreateAdd(VectorIndexPhi, VecLen,
"",
556 VectorIndexPhi->
addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
559 {PredVTy, I64Type}, {NewVectorIndexPhi, ExtEnd});
560 LoopPred->
addIncoming(NewPred, VectorLoopIncBlock);
562 Value *PredHasActiveLanes =
566 Builder.
Insert(VectorLoopBranchBack);
575 PHINode *FoundPred = Builder.
CreatePHI(PredVTy, 1,
"mismatch_vec_found_pred");
576 FoundPred->
addIncoming(VectorMatchCmp, VectorLoopStartBlock);
578 Builder.
CreatePHI(PredVTy, 1,
"mismatch_vec_last_loop_pred");
579 LastLoopPred->
addIncoming(LoopPred, VectorLoopStartBlock);
581 Builder.
CreatePHI(I64Type, 1,
"mismatch_vec_found_index");
582 VectorFoundIndex->
addIncoming(VectorIndexPhi, VectorLoopStartBlock);
586 Intrinsic::experimental_cttz_elts, {ResType, PredMatchCmp->
getType()},
587 {PredMatchCmp, Builder.
getInt1(
true)});
589 Value *VectorLoopRes64 = Builder.
CreateAdd(VectorFoundIndex, Ctz,
"",
624 Builder.
Insert(MatchCmpBr);
631 Value *PhiInc = Builder.
CreateAdd(IndexPhi, ConstantInt::get(ResType, 1),
"",
632 Index->hasNoUnsignedWrap(),
633 Index->hasNoSignedWrap());
654 ResPhi->
addIncoming(VectorLoopRes, VectorLoopMismatchBlock);
659 ScalarLoop->verifyLoop();
660 VectorLoop->verifyLoop();
661 if (!VectorLoop->isRecursivelyLCSSAForm(*DT, *LI))
663 if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI))
678 BasicBlock *Preheader = CurLoop->getLoopPreheader();
687 Start = Builder.
CreateAdd(Start, ConstantInt::get(Start->getType(), 1));
690 expandFindMismatch(Builder, DTU, GEPA, GEPB,
Index, Start, MaxLen);
694 assert(IndPhi->
hasOneUse() &&
"Index phi node has more than one use!");
695 Index->replaceAllUsesWith(ByteCmpRes);
698 "Expected preheader to terminate with an unconditional branch.");
704 CmpBB->moveBefore(EndBB);
717 if (FoundBB != EndBB) {
728 auto fixSuccessorPhis = [&](
BasicBlock *SuccBB) {
729 for (
PHINode &PN : SuccBB->phis()) {
735 if (
Op == ByteCmpRes) {
752 if (CurLoop->contains(BB)) {
761 fixSuccessorPhis(EndBB);
762 if (EndBB != FoundBB)
763 fixSuccessorPhis(FoundBB);
767 if (!CurLoop->isOutermost())
768 CurLoop->getParentLoop()->addBasicBlockToLoop(CmpBB, *LI);
771 CurLoop->getParentLoop()->verifyLoop();
772 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< bool > VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false), cl::desc("Verify loops generated Loop Idiom Vectorize Pass."))
static cl::opt< bool > DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden, cl::init(false), cl::desc("Disable Loop Idiom Vectorize Pass."))
static cl::opt< bool > DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden, cl::init(false), cl::desc("Proceed with Loop Idiom Vectorize Pass, but do " "not convert byte-compare loop(s)."))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A container for analyses that lazily runs them and caches their results.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
bool isUnconditional() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Value * getPointerOperand()
Type * getResultElementType() const
unsigned getNumIndices() const
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Represents a single loop in the control flow graph.
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents an analyzed expression in the program.
Class to represent scalable SIMD vectors.
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI