41#include "llvm/IR/IntrinsicsHexagon.h"
76#define DEBUG_TYPE "hexagon-lir"
82 cl::desc(
"Disable generation of memcpy in loop idiom recognition"));
86 cl::desc(
"Disable generation of memmove in loop idiom recognition"));
90 "check guarding the memmove."));
94 cl::desc(
"Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
99 cl::desc(
"Only enable generating memmove in non-nested loops"));
103 cl::desc(
"Enable Hexagon-specific memcpy for volatile destination."));
110class HexagonLoopIdiomRecognize {
115 :
AA(
AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
120 int getSCEVStride(
const SCEVAddRecExpr *StoreEv);
121 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
122 void collectStores(Loop *CurLoop, BasicBlock *BB,
123 SmallVectorImpl<StoreInst *> &Stores);
124 bool processCopyingStore(Loop *CurLoop, StoreInst *SI,
const SCEV *BECount);
125 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts)
const;
126 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
const SCEV *BECount,
127 SmallVectorImpl<BasicBlock *> &ExitBlocks);
128 bool runOnCountableLoop(Loop *L);
131 const DataLayout *DL;
134 const TargetLibraryInfo *TLI;
136 bool HasMemcpy, HasMemmove;
139class HexagonLoopIdiomRecognizeLegacyPass :
public LoopPass {
143 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {}
145 StringRef getPassName()
const override {
146 return "Recognize Hexagon-specific loop idioms";
149 void getAnalysisUsage(AnalysisUsage &AU)
const override {
160 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
166 Rule(StringRef
N, FuncType
F) : Name(
N), Fn(
F) {}
171 void addRule(StringRef
N,
const Rule::FuncType &
F) {
172 Rules.push_back(Rule(
N,
F));
176 struct WorkListType {
177 WorkListType() =
default;
179 void push_back(
Value *V) {
181 if (S.insert(V).second)
185 Value *pop_front_val() {
192 bool empty()
const {
return Q.empty(); }
195 std::deque<Value *> Q;
199 using ValueSetType = std::set<Value *>;
201 std::vector<Rule> Rules;
205 using ValueMapType = DenseMap<Value *, Value *>;
219 void print(raw_ostream &OS,
const Value *V)
const;
223 friend struct Simplifier;
228 template <
typename FuncT>
void traverse(
Value *V, FuncT
F);
229 void record(
Value *V);
231 void unuse(
Value *V);
233 bool equal(
const Instruction *
I,
const Instruction *J)
const;
244 PE(
const Simplifier::Context &c,
Value *v =
nullptr) : C(c), V(
v) {}
246 const Simplifier::Context &C;
252 P.C.print(OS,
P.V ?
P.V :
P.C.Root);
258char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
261 "Recognize Hexagon-specific loop idioms",
false,
false)
272template <typename FuncT>
273void Simplifier::Context::traverse(
Value *V, FuncT
F) {
278 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
279 if (!U || U->getParent())
283 for (Value *Op : U->operands())
291 OS <<
V <<
'(' << *
V <<
')';
295 if (
U->getParent()) {
297 U->printAsOperand(OS,
true);
302 unsigned N =
U->getNumOperands();
305 OS <<
U->getOpcodeName();
306 for (
const Value *
Op :
U->operands()) {
314void Simplifier::Context::initialize(Instruction *Exp) {
324 Value *
V = Q.pop_front_val();
332 M.insert({
U,
U->clone()});
336 for (std::pair<Value*,Value*>
P : M) {
338 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
339 auto F =
M.find(
U->getOperand(i));
341 U->setOperand(i,
F->second);
345 auto R =
M.find(Exp);
353void Simplifier::Context::record(
Value *V) {
361void Simplifier::Context::use(
Value *V) {
369void Simplifier::Context::unuse(
Value *V) {
393 if (!U ||
U->getParent())
395 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
398 U->setOperand(i, NewV);
408void Simplifier::Context::replace(
Value *OldV,
Value *NewV) {
424 Value *
V = Q.pop_front_val();
426 if (!U ||
U->getParent())
430 NewV = subst(NewV, V, DupV);
438 Root = subst(Root, OldV, NewV);
442void Simplifier::Context::cleanup() {
443 for (
Value *V : Clones) {
446 U->dropAllReferences();
449 for (
Value *V : Clones) {
456bool Simplifier::Context::equal(
const Instruction *
I,
457 const Instruction *J)
const {
460 if (!
I->isSameOperationAs(J))
463 return I->isIdenticalTo(J);
465 for (
unsigned i = 0, n =
I->getNumOperands(); i != n; ++i) {
472 if (!
equal(InI, InJ))
474 }
else if (InI != InJ || !InI)
486 Value *
V = Q.pop_front_val();
490 if (!U ||
U->getParent())
492 if (SubI &&
equal(SubI, U))
501void Simplifier::Context::link(Instruction *
I, BasicBlock *
B,
511 I->insertInto(
B, At);
514Value *Simplifier::Context::materialize(BasicBlock *
B,
528 if (
Count++ >= Limit)
531 if (!U ||
U->getParent() || !
C.Used.count(U))
534 for (Rule &R : Rules) {
549 return Count < Limit ?
C.Root :
nullptr;
560 class PolynomialMultiplyRecognize {
562 explicit PolynomialMultiplyRecognize(Loop *loop,
const DataLayout &dl,
563 const DominatorTree &dt,
const TargetLibraryInfo &tli,
565 : CurLoop(loop),
DL(dl), DT(dt), TLI(tli), SE(se) {}
570 using ValueSeq = SetVector<Value *>;
572 IntegerType *getPmpyType()
const {
573 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
577 bool isPromotableTo(
Value *V, IntegerType *Ty);
578 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
579 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
581 Value *getCountIV(BasicBlock *BB);
583 void classifyCycle(Instruction *DivI, ValueSeq &
Cycle, ValueSeq &Early,
585 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
586 bool commutesWithShift(Instruction *
I);
587 bool highBitsAreZero(
Value *V,
unsigned IterCount);
588 bool keepsHighBitsZero(
Value *V,
unsigned IterCount);
589 bool isOperandShifted(Instruction *
I,
Value *
Op);
590 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
592 void cleanupLoopBody(BasicBlock *LoopB);
594 struct ParsedValues {
595 ParsedValues() =
default;
603 unsigned IterCount = 0;
608 bool matchLeftShift(SelectInst *SelI,
Value *CIV, ParsedValues &PV);
609 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
610 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
611 Value *CIV, ParsedValues &PV,
bool PreScan);
612 unsigned getInverseMxN(
unsigned QP);
615 void setupPreSimplifier(Simplifier &S);
616 void setupPostSimplifier(Simplifier &S);
619 const DataLayout &
DL;
620 const DominatorTree &DT;
621 const TargetLibraryInfo &TLI;
627Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
629 if (std::distance(PI, PE) != 2)
635 Value *InitV = PN->getIncomingValueForBlock(
PB);
638 Value *IterV = PN->getIncomingValueForBlock(BB);
642 if (BO->getOpcode() != Instruction::Add)
644 Value *IncV =
nullptr;
645 if (BO->getOperand(0) == PN)
646 IncV = BO->getOperand(1);
647 else if (BO->getOperand(1) == PN)
648 IncV = BO->getOperand(0);
660 for (
auto UI =
I->user_begin(), UE =
I->user_end(); UI != UE;) {
661 Use &TheUse = UI.getUse();
664 if (BB ==
II->getParent())
665 II->replaceUsesOfWith(
I, J);
669bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
670 Value *CIV, ParsedValues &PV) {
682 using namespace PatternMatch;
685 Value *
A =
nullptr, *
B =
nullptr, *
C =
nullptr;
695 Value *
X =
nullptr, *Sh1 =
nullptr;
723 Value *ShouldSameV =
nullptr, *ShouldXoredV =
nullptr;
726 ShouldXoredV = FalseV;
728 ShouldSameV = FalseV;
729 ShouldXoredV = TrueV;
732 Value *Q =
nullptr, *
R =
nullptr, *
Y =
nullptr, *
Z =
nullptr;
738 if (ShouldSameV ==
Y)
740 else if (ShouldSameV == Z)
783bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
796 using namespace PatternMatch;
823 Value *
R =
nullptr, *Q =
nullptr;
853bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
854 BasicBlock *LoopB, BasicBlock *PrehB,
Value *CIV, ParsedValues &PV,
856 using namespace PatternMatch;
895 if (matchLeftShift(SelI, CIV, PV)) {
904 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
910 if (CurLoop->isLoopInvariant(PV.X)) {
920 Value *Var =
nullptr, *Inv =
nullptr, *X1 =
nullptr, *X2 =
nullptr;
925 if (!I1 ||
I1->getParent() != LoopB) {
928 }
else if (!I2 || I2->getParent() != LoopB) {
939 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
946 if (matchRightShift(SelI, PV)) {
960bool PolynomialMultiplyRecognize::isPromotableTo(
Value *Val,
961 IntegerType *DestTy) {
978 switch (
In->getOpcode()) {
979 case Instruction::PHI:
980 case Instruction::ZExt:
981 case Instruction::And:
982 case Instruction::Or:
983 case Instruction::Xor:
984 case Instruction::LShr:
985 case Instruction::Select:
986 case Instruction::Trunc:
988 case Instruction::ICmp:
990 return CI->isEquality() || CI->isUnsigned();
992 case Instruction::Add:
993 return In->hasNoSignedWrap() &&
In->hasNoUnsignedWrap();
998void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
999 IntegerType *DestTy, BasicBlock *LoopB) {
1000 Type *OrigTy =
In->getType();
1004 if (!
In->getType()->isIntegerTy(1))
1005 In->mutateType(DestTy);
1010 unsigned N =
P->getNumIncomingValues();
1011 for (
unsigned i = 0; i !=
N; ++i) {
1015 Value *InV =
P->getIncomingValue(i);
1018 if (Ty !=
P->getType()) {
1023 P->setIncomingValue(i, InV);
1028 if (
Op->getType() ==
Z->getType())
1029 Z->replaceAllUsesWith(
Op);
1030 Z->eraseFromParent();
1037 T->replaceAllUsesWith(
And);
1038 T->eraseFromParent();
1043 for (
unsigned i = 0, n =
In->getNumOperands(); i != n; ++i) {
1045 if (CI->getBitWidth() < DestBW)
1046 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1050bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1051 BasicBlock *ExitB) {
1059 IntegerType *DestTy = getPmpyType();
1063 for (PHINode &
P : ExitB->
phis()) {
1064 if (
P.getNumIncomingValues() != 1)
1066 assert(
P.getIncomingBlock(0) == LoopB);
1068 if (!
T ||
T->getBitWidth() > DestBW)
1073 for (Instruction &In : *LoopB)
1074 if (!
In.isTerminator() && !isPromotableTo(&In, DestTy))
1079 for (Instruction *In : LoopIns)
1080 if (!
In->isTerminator())
1081 promoteTo(In, DestTy, LoopB);
1085 for (
auto I = ExitB->
begin();
I != End; ++
I) {
1089 Type *Ty0 =
P->getIncomingValue(0)->getType();
1090 Type *PTy =
P->getType();
1098 P->replaceAllUsesWith(
T);
1108bool PolynomialMultiplyRecognize::findCycle(
Value *Out,
Value *In,
1115 bool HadPhi =
false;
1117 for (
auto *U : Out->
users()) {
1119 if (
I ==
nullptr ||
I->getParent() != BB)
1127 if (IsPhi && HadPhi)
1132 if (findCycle(
I, In,
Cycle))
1136 return !
Cycle.empty();
1139void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1140 ValueSeq &
Cycle, ValueSeq &Early, ValueSeq &Late) {
1147 for (
I = 0;
I <
N; ++
I) {
1158 ValueSeq &
First = !IsE ? Early : Late;
1159 for (
unsigned J = 0; J <
I; ++J)
1162 ValueSeq &Second = IsE ? Early : Late;
1164 for (++
I;
I <
N; ++
I) {
1175bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1176 ValueSeq &Early, ValueSeq &Late) {
1180 if (UseI->
getOpcode() == Instruction::Select) {
1182 if (Early.count(TV) || Early.count(FV)) {
1183 if (Late.count(TV) || Late.count(FV))
1186 }
else if (Late.count(TV) || Late.count(FV)) {
1187 if (Early.count(TV) || Early.count(FV))
1199 bool AE =
true,
AL =
true;
1201 if (Early.count(&*
I))
1203 else if (Late.count(&*
I))
1227bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *
I) {
1228 switch (
I->getOpcode()) {
1229 case Instruction::And:
1230 case Instruction::Or:
1231 case Instruction::Xor:
1232 case Instruction::LShr:
1233 case Instruction::Shl:
1234 case Instruction::Select:
1235 case Instruction::ICmp:
1236 case Instruction::PHI:
1244bool PolynomialMultiplyRecognize::highBitsAreZero(
Value *V,
1245 unsigned IterCount) {
1250 KnownBits Known(
T->getBitWidth());
1252 return Known.countMinLeadingZeros() >= IterCount;
1255bool PolynomialMultiplyRecognize::keepsHighBitsZero(
Value *V,
1256 unsigned IterCount) {
1260 return C->getValue().countl_zero() >= IterCount;
1263 switch (
I->getOpcode()) {
1264 case Instruction::And:
1265 case Instruction::Or:
1266 case Instruction::Xor:
1267 case Instruction::LShr:
1268 case Instruction::Select:
1269 case Instruction::ICmp:
1270 case Instruction::PHI:
1271 case Instruction::ZExt:
1279bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *
I,
Value *
Op) {
1280 unsigned Opc =
I->getOpcode();
1281 if (
Opc == Instruction::Shl ||
Opc == Instruction::LShr)
1282 return Op !=
I->getOperand(1);
1286bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1287 BasicBlock *ExitB,
unsigned IterCount) {
1288 Value *CIV = getCountIV(LoopB);
1292 if (CIVTy ==
nullptr)
1296 ValueSeq Early, Late, Cycled;
1299 for (Instruction &
I : *LoopB) {
1300 using namespace PatternMatch;
1306 if (!findCycle(&
I, V,
C))
1311 classifyCycle(&
I,
C, Early, Late);
1312 Cycled.insert_range(
C);
1319 for (
unsigned i = 0; i <
Users.size(); ++i) {
1326 if (!commutesWithShift(R))
1328 for (User *U :
R->users()) {
1337 if (!classifyInst(
T, Early, Late))
1348 for (
unsigned i = 0; i <
Internal.size(); ++i) {
1354 if (
T &&
T->getParent() != LoopB)
1360 for (
Value *V : Inputs)
1361 if (!highBitsAreZero(V, IterCount))
1364 if (!keepsHighBitsZero(V, IterCount))
1369 std::map<Value*,Value*> ShiftMap;
1371 using CastMapType = std::map<std::pair<Value *, Type *>,
Value *>;
1373 CastMapType CastMap;
1376 IntegerType *Ty) ->
Value * {
1377 auto [
H,
Inserted] = CM.try_emplace(std::make_pair(V, Ty));
1379 H->second = IRB.CreateIntCast(V, Ty,
false);
1383 for (
auto I = LoopB->begin(),
E = LoopB->end();
I !=
E; ++
I) {
1384 using namespace PatternMatch;
1397 for (
auto &J :
I->operands()) {
1399 if (!isOperandShifted(&*
I,
Op))
1407 auto F = ShiftMap.find(
Op);
1408 Value *
W = (
F != ShiftMap.end()) ?
F->second :
nullptr;
1410 IRB.SetInsertPoint(&*
I);
1414 Value *ShAmt = CIV, *ShVal =
Op;
1417 if (Late.count(&*
I))
1418 ShVal = IRB.CreateShl(
Op, ConstantInt::get(VTy, 1));
1422 if (VTy->getBitWidth() < ATy->getBitWidth())
1423 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1425 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1428 W = IRB.CreateShl(ShVal, ShAmt);
1429 ShiftMap.insert(std::make_pair(
Op, W));
1431 I->replaceUsesOfWith(
Op, W);
1441 for (
auto P = ExitB->
begin(), Q = ExitB->
end();
P != Q; ++
P) {
1445 Value *
U = PN->getIncomingValueForBlock(LoopB);
1446 if (!
Users.count(U))
1448 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1449 PN->replaceAllUsesWith(S);
1460void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1461 for (
auto &
I : *LoopB)
1463 I.replaceAllUsesWith(SV);
1469unsigned PolynomialMultiplyRecognize::getInverseMxN(
unsigned QP) {
1472 std::array<char,32> Q,
C;
1474 for (
unsigned i = 0; i < 32; ++i) {
1491 for (
unsigned i = 1; i < 32; ++i) {
1499 for (
unsigned j = 0;
j < i; ++
j)
1500 T =
T ^ (
C[j] & Q[i-j]);
1505 for (
unsigned i = 0; i < 32; ++i)
1515 Module *
M = At->getParent()->getParent()->getParent();
1520 unsigned IC = PV.IterCount;
1522 if (PV.M !=
nullptr)
1523 P0 =
P =
B.CreateXor(
P, PV.M);
1528 if (PV.IterCount != 32)
1529 P =
B.CreateAnd(
P, BMI);
1533 assert(QI && QI->getBitWidth() <= 32);
1536 unsigned M = (1 << PV.IterCount) - 1;
1537 unsigned Tmp = (QI->getZExtValue() | 1) &
M;
1538 unsigned QV = getInverseMxN(Tmp) &
M;
1539 auto *QVI = ConstantInt::get(QI->getType(), QV);
1540 P =
B.CreateCall(PMF, {
P, QVI});
1541 P =
B.CreateTrunc(
P, QI->getType());
1543 P =
B.CreateAnd(
P, BMI);
1546 Value *
R =
B.CreateCall(PMF, {
P, Q});
1548 if (PV.M !=
nullptr)
1549 R =
B.CreateXor(R,
B.CreateIntCast(P0,
R->getType(),
false));
1556 return CI->getValue().isNonNegative();
1560 switch (
I->getOpcode()) {
1561 case Instruction::LShr:
1563 return SI->getZExtValue() > 0;
1565 case Instruction::Or:
1566 case Instruction::Xor:
1569 case Instruction::And:
1576void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1577 S.addRule(
"sink-zext",
1579 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1580 if (
I->getOpcode() != Instruction::ZExt)
1585 switch (
T->getOpcode()) {
1586 case Instruction::And:
1587 case Instruction::Or:
1588 case Instruction::Xor:
1595 B.CreateZExt(
T->getOperand(0),
I->getType()),
1596 B.CreateZExt(
T->getOperand(1),
I->getType()));
1598 S.addRule(
"xor/and -> and/xor",
1600 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1601 if (
I->getOpcode() != Instruction::Xor)
1607 if (And0->
getOpcode() != Instruction::And ||
1616 S.addRule(
"sink binop into select",
1619 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1626 Value *
X = Sel->getTrueValue(), *
Y = Sel->getFalseValue();
1628 return B.CreateSelect(Sel->getCondition(),
1629 B.CreateBinOp(
Op,
X, Z),
1630 B.CreateBinOp(
Op,
Y, Z));
1635 Value *
Y = Sel->getTrueValue(), *
Z = Sel->getFalseValue();
1636 return B.CreateSelect(Sel->getCondition(),
1637 B.CreateBinOp(
Op,
X,
Y),
1638 B.CreateBinOp(
Op,
X, Z));
1642 S.addRule(
"fold select-select",
1645 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1652 if (Sel0->getCondition() ==
C)
1656 if (Sel1->getCondition() ==
C)
1657 return B.CreateSelect(
C, Sel->
getTrueValue(), Sel1->getFalseValue());
1661 S.addRule(
"or-signbit -> xor-signbit",
1663 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1664 if (
I->getOpcode() != Instruction::Or)
1671 return IRBuilder<>(Ctx).CreateXor(
I->getOperand(0), Msb);
1673 S.addRule(
"sink lshr into binop",
1675 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1676 if (
I->getOpcode() != Instruction::LShr)
1682 case Instruction::And:
1683 case Instruction::Or:
1684 case Instruction::Xor:
1690 Value *S =
I->getOperand(1);
1695 S.addRule(
"expose bitop-const",
1697 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1698 auto IsBitOp = [](
unsigned Op) ->
bool {
1700 case Instruction::And:
1701 case Instruction::Or:
1702 case Instruction::Xor:
1708 if (!BitOp1 || !IsBitOp(BitOp1->
getOpcode()))
1711 if (!BitOp2 || !IsBitOp(BitOp2->
getOpcode()))
1722 S.addRule(
"select with trunc cond to select with icmp cond",
1725 [](Instruction *
I, LLVMContext &Ctx) ->
Value * {
1731 using namespace PatternMatch;
1737 Type *Ty =
X->getType();
1738 Value *
And =
B.CreateAnd(
X, ConstantInt::get(Ty, 1));
1740 : ICmpInst::ICMP_EQ,
1741 And, ConstantInt::get(Ty, 0));
1747void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1748 S.addRule(
"(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1749 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1750 if (
I->getOpcode() != Instruction::And)
1756 if (
Xor->getOpcode() != Instruction::Xor)
1761 if (!And0 || And0->
getOpcode() != Instruction::And)
1768 if (V0 != (V0 & V1))
1771 return B.CreateAnd(
B.CreateXor(And0->
getOperand(0), And1), C0);
1775bool PolynomialMultiplyRecognize::recognize() {
1776 LLVM_DEBUG(
dbgs() <<
"Starting PolynomialMultiplyRecognize on loop\n"
1777 << *CurLoop <<
'\n');
1786 if (LoopB != CurLoop->getLoopLatch())
1789 if (ExitB ==
nullptr)
1791 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1792 if (EntryB ==
nullptr)
1795 unsigned IterCount = 0;
1796 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1800 IterCount = CV->getValue()->getZExtValue() + 1;
1802 Value *CIV = getCountIV(LoopB);
1807 PV.IterCount = IterCount;
1808 LLVM_DEBUG(
dbgs() <<
"Loop IV: " << *CIV <<
"\nIterCount: " << IterCount
1811 setupPreSimplifier(PreSimp);
1819 bool FoundPreScan =
false;
1820 auto FeedsPHI = [LoopB](
const Value *
V) ->
bool {
1821 for (
const Value *U :
V->users()) {
1823 if (
P->getParent() == LoopB)
1828 for (Instruction &In : *LoopB) {
1830 if (!SI || !FeedsPHI(SI))
1833 Simplifier::Context
C(SI);
1834 Value *
T = PreSimp.simplify(
C);
1836 LLVM_DEBUG(
dbgs() <<
"scanSelect(pre-scan): " << PE(
C, SelI) <<
'\n');
1837 if (scanSelect(SelI, LoopB, EntryB, CIV, PV,
true)) {
1838 FoundPreScan =
true;
1840 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1841 SI->replaceAllUsesWith(NewSel);
1848 if (!FoundPreScan) {
1858 if (!promoteTypes(LoopB, ExitB))
1861 Simplifier PostSimp;
1862 setupPostSimplifier(PostSimp);
1863 for (Instruction &In : *LoopB) {
1865 if (!SI || !FeedsPHI(SI))
1867 Simplifier::Context
C(SI);
1868 Value *
T = PostSimp.simplify(
C);
1871 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1872 SI->replaceAllUsesWith(NewSel);
1878 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1880 cleanupLoopBody(LoopB);
1884 bool FoundScan =
false;
1885 for (Instruction &In : *LoopB) {
1890 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV,
false);
1897 StringRef PP = (PV.M ?
"(P+M)" :
"P");
1899 dbgs() <<
"Found pmpy idiom: R = " << PP <<
".Q\n";
1901 dbgs() <<
"Found inverse pmpy idiom: R = (" << PP <<
"/Q).Q) + "
1903 dbgs() <<
" Res:" << *PV.Res <<
"\n P:" << *PV.P <<
"\n";
1905 dbgs() <<
" M:" << *PV.M <<
"\n";
1906 dbgs() <<
" Q:" << *PV.Q <<
"\n";
1907 dbgs() <<
" Iteration count:" << PV.IterCount <<
"\n";
1911 Value *PM = generate(At, PV);
1915 if (PM->
getType() != PV.Res->getType())
1916 PM =
IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(),
false);
1918 PV.Res->replaceAllUsesWith(PM);
1919 PV.Res->eraseFromParent();
1923int HexagonLoopIdiomRecognize::getSCEVStride(
const SCEVAddRecExpr *S) {
1925 return SC->getAPInt().getSExtValue();
1929bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1934 Value *StoredVal =
SI->getValueOperand();
1935 Value *StorePtr =
SI->getPointerOperand();
1938 uint64_t SizeInBits =
DL->getTypeSizeInBits(StoredVal->
getType());
1939 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1946 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1951 int Stride = getSCEVStride(StoreEv);
1954 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1955 if (StoreSize !=
unsigned(std::abs(Stride)))
1968 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1972 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1984 const SCEV *BECount,
unsigned StoreSize,
2004 for (
auto *
B : L->blocks())
2006 if (Ignored.
count(&
I) == 0 &&
2013void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2014 SmallVectorImpl<StoreInst*> &Stores) {
2016 for (Instruction &
I : *BB)
2018 if (isLegalStore(CurLoop, SI))
2022bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2023 StoreInst *SI,
const SCEV *BECount) {
2025 "Expected only non-volatile stores, or Hexagon-specific memcpy"
2026 "to volatile destination.");
2028 Value *StorePtr =
SI->getPointerOperand();
2030 unsigned Stride = getSCEVStride(StoreEv);
2031 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
2032 if (Stride != StoreSize)
2047 SCEVExpander Expander(*SE,
"hexagon-loop-idiom");
2049 Type *IntPtrTy = Builder.getIntPtrTy(*
DL,
SI->getPointerAddressSpace());
2057 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2058 Builder.getPtrTy(
SI->getPointerAddressSpace()), ExpPt);
2059 Value *LoadBasePtr =
nullptr;
2061 bool Overlap =
false;
2062 bool DestVolatile =
SI->isVolatile();
2068 if (StoreSize != 4 ||
DL->getTypeSizeInBits(BECountTy) > 32) {
2072 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2074 StoreBasePtr =
nullptr;
2078 LoadBasePtr =
nullptr;
2084 SmallPtrSet<Instruction*, 2> Ignore1;
2087 StoreSize, *AA, Ignore1)) {
2091 BECount, StoreSize, *AA, Ignore1)) {
2093 goto CleanupAndExit;
2101 goto CleanupAndExit;
2106 if (
Func->hasFnAttribute(Attribute::AlwaysInline))
2107 goto CleanupAndExit;
2113 SmallVector<Instruction*,2> Insts;
2116 if (!coverLoop(CurLoop, Insts))
2117 goto CleanupAndExit;
2120 goto CleanupAndExit;
2123 goto CleanupAndExit;
2128 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2131 SmallPtrSet<Instruction*, 2> Ignore2;
2134 StoreSize, *AA, Ignore2))
2135 goto CleanupAndExit;
2138 bool StridePos = getSCEVStride(LoadEv) >= 0;
2141 if (!StridePos && DestVolatile)
2142 goto CleanupAndExit;
2144 bool RuntimeCheck = (Overlap || DestVolatile);
2149 SmallVector<BasicBlock*, 8> ExitBlocks;
2151 if (ExitBlocks.
size() != 1)
2152 goto CleanupAndExit;
2153 ExitB = ExitBlocks[0];
2158 LLVMContext &Ctx =
SI->getContext();
2159 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2162 const SCEV *NumBytesS =
2163 SE->getAddExpr(BECount, SE->getOne(IntPtrTy),
SCEV::FlagNUW);
2165 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2167 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2177 uint64_t
C = CI->getZExtValue();
2178 if (Threshold != 0 &&
C < Threshold)
2179 goto CleanupAndExit;
2181 goto CleanupAndExit;
2186 Loop *ParentL = LF->getLoopFor(Preheader);
2187 StringRef HeaderName = Header->getName();
2196 for (
auto &In : *Header) {
2204 DT->addNewBlock(NewPreheader, Preheader);
2205 DT->changeImmediateDominator(Header, NewPreheader);
2213 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2214 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2215 Value *LowA = StridePos ? SA : LA;
2216 Value *HighA = StridePos ? LA : SA;
2217 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2222 Value *Dist = Builder.CreateSub(LowA, HighA);
2223 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2224 Value *CmpEither = Builder.CreateOr(
Cond, CmpD);
2227 if (Threshold != 0) {
2229 Value *Thr = ConstantInt::get(Ty, Threshold);
2230 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2231 Value *CmpBoth = Builder.CreateAnd(
Cond, CmpB);
2235 Func, NewPreheader);
2239 Builder.CreateCondBr(
Cond, MemmoveB, NewPreheader);
2242 DT->addNewBlock(MemmoveB, Preheader);
2246 ExitD = DT->findNearestCommonDominator(ExitD,
PB);
2254 if (ExitD && DT->dominates(Preheader, ExitD)) {
2262 CondBuilder.CreateBr(ExitB);
2267 Type *PtrTy = PointerType::get(Ctx, 0);
2268 Type *VoidTy = Type::getVoidTy(Ctx);
2272 StringRef HexagonVolatileMemcpyName =
2274 RTLIB::impl_hexagon_memcpy_forward_vp4cp4n2);
2275 FunctionCallee Fn =
M->getOrInsertFunction(
2276 HexagonVolatileMemcpyName, VoidTy, PtrTy, PtrTy,
Int32Ty);
2278 const SCEV *OneS = SE->getConstant(
Int32Ty, 1);
2279 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount,
Int32Ty);
2280 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS,
SCEV::FlagNUW);
2281 Value *NumWords = Expander.expandCodeFor(NumWordsS,
Int32Ty,
2287 NewCall = CondBuilder.CreateCall(Fn,
2288 {StoreBasePtr, LoadBasePtr, NumWords});
2290 NewCall = CondBuilder.CreateMemMove(
2291 StoreBasePtr,
SI->getAlign(), LoadBasePtr, LI->
getAlign(), NumBytes);
2294 NewCall = Builder.CreateMemCpy(StoreBasePtr,
SI->getAlign(), LoadBasePtr,
2303 LLVM_DEBUG(
dbgs() <<
" Formed " << (Overlap ?
"memmove: " :
"memcpy: ")
2305 <<
" from load ptr=" << *LoadEv <<
" at: " << *LI <<
"\n"
2306 <<
" from store ptr=" << *StoreEv <<
" at: " << *SI
2315bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2316 SmallVectorImpl<Instruction*> &Insts)
const {
2317 SmallPtrSet<BasicBlock *, 8> LoopBlocks;
2326 for (
unsigned i = 0; i < Worklist.size(); ++i) {
2328 for (
auto I =
In->op_begin(),
E =
In->op_end();
I !=
E; ++
I) {
2335 Worklist.insert(OpI);
2343 for (
auto *
B :
L->blocks()) {
2344 for (
auto &In : *
B) {
2347 if (!Worklist.count(&In) &&
In.mayHaveSideEffects())
2349 for (
auto *K :
In.users()) {
2354 if (LF->getLoopFor(UseB) != L)
2366bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2367 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2371 auto DominatedByBB = [
this,BB] (
BasicBlock *EB) ->
bool {
2372 return DT->dominates(BB, EB);
2374 if (!
all_of(ExitBlocks, DominatedByBB))
2377 bool MadeChange =
false;
2379 SmallVector<StoreInst*,8> Stores;
2380 collectStores(CurLoop, BB, Stores);
2383 for (
auto &SI : Stores)
2384 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2389bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2390 PolynomialMultiplyRecognize PMR(L, *
DL, *DT, *TLI, *SE);
2391 if (PMR.recognize())
2394 if (!HasMemcpy && !HasMemmove)
2397 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2399 "runOnCountableLoop() called on a loop without a predictable"
2400 "backedge-taken count");
2402 SmallVector<BasicBlock *, 8> ExitBlocks;
2403 L->getUniqueExitBlocks(ExitBlocks);
2408 for (
auto *BB :
L->getBlocks()) {
2410 if (LF->getLoopFor(BB) != L)
2412 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2418bool HexagonLoopIdiomRecognize::run(Loop *L) {
2419 const Module &
M = *
L->getHeader()->getParent()->getParent();
2425 if (!
L->getLoopPreheader())
2429 StringRef
Name =
L->getHeader()->getParent()->getName();
2430 if (Name ==
"memset" || Name ==
"memcpy" || Name ==
"memmove")
2433 DL = &
L->getHeader()->getDataLayout();
2435 HasMemcpy = TLI->has(LibFunc_memcpy);
2436 HasMemmove = TLI->has(LibFunc_memmove);
2438 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2439 return runOnCountableLoop(L);
2443bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2444 LPPassManager &LPM) {
2448 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2449 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2450 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2451 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2452 *
L->getHeader()->getParent());
2453 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2454 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2458 return new HexagonLoopIdiomRecognizeLegacyPass();
2465 return HexagonLoopIdiomRecognize(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
TLI, &AR.
SE)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ATTRIBUTE_USED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static cl::opt< unsigned > SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"))
static cl::opt< bool > DisableMemcpyIdiom("disable-memcpy-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memcpy in loop idiom recognition"))
static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB)
static cl::opt< unsigned > RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " "check guarding the memmove."))
static cl::opt< bool > HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination."))
static cl::opt< bool > DisableMemmoveIdiom("disable-memmove-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memmove in loop idiom recognition"))
static cl::opt< unsigned > CompileTimeMemSizeThreshold("compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), cl::desc("Threshold (in bytes) to perform the transformation, if the " "runtime loop count (mem transfer size) is known at compile-time."))
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
static bool hasZeroSignBit(const Value *V)
static cl::opt< bool > OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Move duplicate certain instructions close to their use
This header provides classes for managing per-loop analyses.
Machine Check Debug Module
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
void setIDom(DomTreeNodeBase *NewIDom)
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
Representation for a specific memory location.
void setIncomingBlock(unsigned i, BasicBlock *BB)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a constant integer value.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
Provides information about what library functions are available for the current target.
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
LLVM_ABI void link(std::unique_ptr< LinkGraph > G, std::unique_ptr< JITLinkContext > Ctx)
Link the given graph.
NodeAddr< UseNode * > Use
NodeAddr< FuncNode * > Func
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto pred_end(const MachineBasicBlock *BB)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
constexpr from_range_t from_range
Pass * createHexagonLoopIdiomPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI char & LoopSimplifyID
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
bool isModOrRefSet(const ModRefInfo MRI)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
PredIterator< BasicBlock, Value::user_iterator > pred_iterator
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.