42#include "llvm/IR/IntrinsicsHexagon.h"
76#define DEBUG_TYPE "hexagon-lir"
82 cl::desc(
"Disable generation of memcpy in loop idiom recognition"));
86 cl::desc(
"Disable generation of memmove in loop idiom recognition"));
90 "check guarding the memmove."));
94 cl::desc(
"Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
99 cl::desc(
"Only enable generating memmove in non-nested loops"));
103 cl::desc(
"Enable Hexagon-specific memcpy for volatile destination."));
109 =
"hexagon_memcpy_forward_vp4cp4n2";
121class HexagonLoopIdiomRecognize {
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
139 bool runOnCountableLoop(
Loop *L);
147 bool HasMemcpy, HasMemmove;
150class HexagonLoopIdiomRecognizeLegacyPass :
public LoopPass {
154 explicit HexagonLoopIdiomRecognizeLegacyPass() :
LoopPass(
ID) {
160 return "Recognize Hexagon-specific loop idioms";
185 void addRule(
StringRef N,
const Rule::FuncType &
F) {
186 Rules.push_back(Rule(
N,
F));
190 struct WorkListType {
191 WorkListType() =
default;
193 void push_back(
Value *V) {
195 if (S.insert(V).second)
199 Value *pop_front_val() {
206 bool empty()
const {
return Q.empty(); }
209 std::deque<Value *> Q;
213 using ValueSetType = std::set<Value *>;
215 std::vector<Rule> Rules;
237 friend struct Simplifier;
242 template <
typename FuncT>
void traverse(
Value *V, FuncT
F);
243 void record(
Value *V);
245 void unuse(
Value *V);
258 PE(
const Simplifier::Context &c,
Value *v =
nullptr) :
C(c),
V(
v) {}
260 const Simplifier::Context &
C;
266 P.C.print(
OS,
P.V ?
P.V :
P.C.Root);
272char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
275 "Recognize Hexagon-specific loop idioms",
false,
false)
286template <typename FuncT>
287void Simplifier::Context::traverse(
Value *V, FuncT
F) {
292 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
293 if (!U || U->getParent())
297 for (
Value *
Op : U->operands())
303 const auto *
U = dyn_cast<const Instruction>(V);
305 OS <<
V <<
'(' << *
V <<
')';
309 if (
U->getParent()) {
311 U->printAsOperand(
OS,
true);
316 unsigned N =
U->getNumOperands();
319 OS <<
U->getOpcodeName();
320 for (
const Value *
Op :
U->operands()) {
328void Simplifier::Context::initialize(
Instruction *Exp) {
338 Value *
V = Q.pop_front_val();
342 if (isa<PHINode>(U) ||
U->getParent() !=
Block)
346 M.insert({
U,
U->clone()});
350 for (std::pair<Value*,Value*>
P : M) {
352 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
353 auto F =
M.find(
U->getOperand(i));
355 U->setOperand(i,
F->second);
359 auto R =
M.find(Exp);
367void Simplifier::Context::record(
Value *V) {
375void Simplifier::Context::use(
Value *V) {
383void Simplifier::Context::unuse(
Value *V) {
384 if (!isa<Instruction>(V) || cast<Instruction>(V)->
getParent() !=
nullptr)
405 Instruction *
U = dyn_cast<Instruction>(Q.pop_front_val());
407 if (!U ||
U->getParent())
409 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
412 U->setOperand(i, NewV);
422void Simplifier::Context::replace(
Value *OldV,
Value *NewV) {
438 Value *
V = Q.pop_front_val();
440 if (!U ||
U->getParent())
444 NewV = subst(NewV, V, DupV);
452 Root = subst(Root, OldV, NewV);
456void Simplifier::Context::cleanup() {
457 for (
Value *V : Clones) {
460 U->dropAllReferences();
463 for (
Value *V : Clones) {
474 if (!
I->isSameOperationAs(J))
477 return I->isIdenticalTo(J);
479 for (
unsigned i = 0, n =
I->getNumOperands(); i != n; ++i) {
483 auto *InI = dyn_cast<const Instruction>(OpI);
484 auto *InJ = dyn_cast<const Instruction>(OpJ);
486 if (!
equal(InI, InJ))
488 }
else if (InI != InJ || !InI)
500 Value *
V = Q.pop_front_val();
504 if (!U ||
U->getParent())
506 if (SubI &&
equal(SubI, U))
525 I->insertInto(
B, At);
530 if (
Instruction *RootI = dyn_cast<Instruction>(Root))
535Value *Simplifier::simplify(Context &
C) {
542 if (Count++ >= Limit)
544 Instruction *
U = dyn_cast<Instruction>(Q.pop_front_val());
545 if (!U ||
U->getParent() || !
C.Used.count(U))
547 bool Changed =
false;
548 for (Rule &R : Rules) {
563 return Count < Limit ?
C.Root :
nullptr;
574 class PolynomialMultiplyRecognize {
576 explicit PolynomialMultiplyRecognize(
Loop *loop,
const DataLayout &dl,
579 : CurLoop(loop),
DL(dl), DT(dt), TLI(tli), SE(se) {}
587 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
599 bool classifyInst(
Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
601 bool highBitsAreZero(
Value *V,
unsigned IterCount);
602 bool keepsHighBitsZero(
Value *V,
unsigned IterCount);
608 struct ParsedValues {
609 ParsedValues() =
default;
617 unsigned IterCount = 0;
623 bool matchRightShift(
SelectInst *SelI, ParsedValues &PV);
625 Value *CIV, ParsedValues &PV,
bool PreScan);
626 unsigned getInverseMxN(
unsigned QP);
629 void setupPreSimplifier(Simplifier &S);
630 void setupPostSimplifier(Simplifier &S);
643 if (std::distance(PI, PE) != 2)
647 for (
auto I = BB->
begin(), E = BB->
end();
I != E && isa<PHINode>(
I); ++
I) {
648 auto *PN = cast<PHINode>(
I);
649 Value *InitV = PN->getIncomingValueForBlock(
PB);
650 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->
isZero())
652 Value *IterV = PN->getIncomingValueForBlock(BB);
653 auto *BO = dyn_cast<BinaryOperator>(IterV);
656 if (BO->getOpcode() != Instruction::Add)
658 Value *IncV =
nullptr;
659 if (BO->getOperand(0) == PN)
660 IncV = BO->getOperand(1);
661 else if (BO->getOperand(1) == PN)
662 IncV = BO->getOperand(0);
666 if (
auto *
T = dyn_cast<ConstantInt>(IncV))
674 for (
auto UI =
I->user_begin(), UE =
I->user_end(); UI != UE;) {
675 Use &TheUse = UI.getUse();
677 if (
auto *
II = dyn_cast<Instruction>(TheUse.
getUser()))
678 if (BB ==
II->getParent())
679 II->replaceUsesOfWith(
I, J);
683bool PolynomialMultiplyRecognize::matchLeftShift(
SelectInst *SelI,
684 Value *CIV, ParsedValues &PV) {
696 using namespace PatternMatch;
699 Value *
A =
nullptr, *
B =
nullptr, *
C =
nullptr;
709 Value *
X =
nullptr, *Sh1 =
nullptr;
737 Value *ShouldSameV =
nullptr, *ShouldXoredV =
nullptr;
740 ShouldXoredV = FalseV;
742 ShouldSameV = FalseV;
743 ShouldXoredV = TrueV;
746 Value *Q =
nullptr, *
R =
nullptr, *
Y =
nullptr, *
Z =
nullptr;
752 if (ShouldSameV ==
Y)
754 else if (ShouldSameV == Z)
797bool PolynomialMultiplyRecognize::matchRightShift(
SelectInst *SelI,
810 using namespace PatternMatch;
837 Value *
R =
nullptr, *Q =
nullptr;
867bool PolynomialMultiplyRecognize::scanSelect(
SelectInst *SelI,
870 using namespace PatternMatch;
909 if (matchLeftShift(SelI, CIV, PV)) {
915 auto *RPhi = dyn_cast<PHINode>(PV.R);
918 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
924 if (CurLoop->isLoopInvariant(PV.X)) {
934 Value *Var =
nullptr, *Inv =
nullptr, *X1 =
nullptr, *X2 =
nullptr;
937 auto *
I1 = dyn_cast<Instruction>(X1);
938 auto *I2 = dyn_cast<Instruction>(X2);
939 if (!I1 ||
I1->getParent() != LoopB) {
942 }
else if (!I2 || I2->getParent() != LoopB) {
953 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
960 if (matchRightShift(SelI, PV)) {
963 if (PV.Inv && !isa<ConstantInt>(PV.Q))
974bool PolynomialMultiplyRecognize::isPromotableTo(
Value *Val,
992 switch (
In->getOpcode()) {
993 case Instruction::PHI:
994 case Instruction::ZExt:
995 case Instruction::And:
996 case Instruction::Or:
997 case Instruction::Xor:
998 case Instruction::LShr:
999 case Instruction::Select:
1000 case Instruction::Trunc:
1002 case Instruction::ICmp:
1003 if (
CmpInst *CI = cast<CmpInst>(In))
1004 return CI->isEquality() || CI->isUnsigned();
1006 case Instruction::Add:
1007 return In->hasNoSignedWrap() &&
In->hasNoUnsignedWrap();
1012void PolynomialMultiplyRecognize::promoteTo(
Instruction *In,
1014 Type *OrigTy =
In->getType();
1018 if (!
In->getType()->isIntegerTy(1))
1019 In->mutateType(DestTy);
1023 if (
PHINode *
P = dyn_cast<PHINode>(In)) {
1024 unsigned N =
P->getNumIncomingValues();
1025 for (
unsigned i = 0; i !=
N; ++i) {
1029 Value *InV =
P->getIncomingValue(i);
1032 if (Ty !=
P->getType()) {
1037 P->setIncomingValue(i, InV);
1040 }
else if (
ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1042 if (
Op->getType() ==
Z->getType())
1043 Z->replaceAllUsesWith(
Op);
1044 Z->eraseFromParent();
1047 if (
TruncInst *
T = dyn_cast<TruncInst>(In)) {
1051 T->replaceAllUsesWith(
And);
1052 T->eraseFromParent();
1057 for (
unsigned i = 0, n =
In->getNumOperands(); i != n; ++i) {
1058 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
In->getOperand(i)))
1059 if (CI->getBitWidth() < DestBW)
1060 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1064bool PolynomialMultiplyRecognize::promoteTypes(
BasicBlock *LoopB,
1078 if (
P.getNumIncomingValues() != 1)
1080 assert(
P.getIncomingBlock(0) == LoopB);
1082 if (!
T ||
T->getBitWidth() > DestBW)
1088 if (!
In.isTerminator() && !isPromotableTo(&In, DestTy))
1092 std::vector<Instruction*> LoopIns;
1093 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1096 if (!
In->isTerminator())
1097 promoteTo(In, DestTy, LoopB);
1106 Type *Ty0 =
P->getIncomingValue(0)->getType();
1107 Type *PTy =
P->getType();
1115 P->replaceAllUsesWith(
T);
1118 cast<Instruction>(
T)->setOperand(0,
P);
1125bool PolynomialMultiplyRecognize::findCycle(
Value *Out,
Value *In,
1131 auto *BB = cast<Instruction>(Out)->
getParent();
1132 bool HadPhi =
false;
1134 for (
auto *U : Out->
users()) {
1135 auto *
I = dyn_cast<Instruction>(&*U);
1136 if (
I ==
nullptr ||
I->getParent() != BB)
1143 bool IsPhi = isa<PHINode>(
I);
1144 if (IsPhi && HadPhi)
1149 if (findCycle(
I, In,
Cycle))
1153 return !
Cycle.empty();
1156void PolynomialMultiplyRecognize::classifyCycle(
Instruction *DivI,
1157 ValueSeq &
Cycle, ValueSeq &Early, ValueSeq &Late) {
1164 for (
I = 0;
I <
N; ++
I) {
1168 else if (!isa<PHINode>(V))
1175 ValueSeq &
First = !IsE ? Early : Late;
1176 for (
unsigned J = 0; J <
I; ++J)
1179 ValueSeq &Second = IsE ? Early : Late;
1181 for (++
I;
I <
N; ++
I) {
1183 if (DivI == V || isa<PHINode>(V))
1192bool PolynomialMultiplyRecognize::classifyInst(
Instruction *UseI,
1193 ValueSeq &Early, ValueSeq &Late) {
1197 if (UseI->
getOpcode() == Instruction::Select) {
1199 if (Early.count(TV) || Early.count(FV)) {
1200 if (Late.count(TV) || Late.count(FV))
1203 }
else if (Late.count(TV) || Late.count(FV)) {
1204 if (Early.count(TV) || Early.count(FV))
1216 bool AE =
true,
AL =
true;
1218 if (Early.count(&*
I))
1220 else if (Late.count(&*
I))
1244bool PolynomialMultiplyRecognize::commutesWithShift(
Instruction *
I) {
1245 switch (
I->getOpcode()) {
1246 case Instruction::And:
1247 case Instruction::Or:
1248 case Instruction::Xor:
1249 case Instruction::LShr:
1250 case Instruction::Shl:
1251 case Instruction::Select:
1252 case Instruction::ICmp:
1253 case Instruction::PHI:
1261bool PolynomialMultiplyRecognize::highBitsAreZero(
Value *V,
1262 unsigned IterCount) {
1263 auto *
T = dyn_cast<IntegerType>(
V->getType());
1269 return Known.countMinLeadingZeros() >= IterCount;
1272bool PolynomialMultiplyRecognize::keepsHighBitsZero(
Value *V,
1273 unsigned IterCount) {
1276 if (
auto *
C = dyn_cast<ConstantInt>(V))
1277 return C->getValue().countl_zero() >= IterCount;
1279 if (
auto *
I = dyn_cast<Instruction>(V)) {
1280 switch (
I->getOpcode()) {
1281 case Instruction::And:
1282 case Instruction::Or:
1283 case Instruction::Xor:
1284 case Instruction::LShr:
1285 case Instruction::Select:
1286 case Instruction::ICmp:
1287 case Instruction::PHI:
1288 case Instruction::ZExt:
1297 unsigned Opc =
I->getOpcode();
1298 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1299 return Op !=
I->getOperand(1);
1303bool PolynomialMultiplyRecognize::convertShiftsToLeft(
BasicBlock *LoopB,
1305 Value *CIV = getCountIV(LoopB);
1308 auto *CIVTy = dyn_cast<IntegerType>(CIV->
getType());
1309 if (CIVTy ==
nullptr)
1313 ValueSeq Early, Late, Cycled;
1317 using namespace PatternMatch;
1323 if (!findCycle(&
I, V,
C))
1328 classifyCycle(&
I,
C, Early, Late);
1329 Cycled.insert(
C.begin(),
C.end());
1335 ValueSeq
Users(Cycled.begin(), Cycled.end());
1336 for (
unsigned i = 0; i <
Users.size(); ++i) {
1338 if (!isa<IntegerType>(
V->getType()))
1340 auto *
R = cast<Instruction>(V);
1343 if (!commutesWithShift(R))
1345 for (
User *U :
R->users()) {
1346 auto *
T = cast<Instruction>(U);
1350 if (
T->getParent() != LoopB || RShifts.count(
T) || isa<PHINode>(
T))
1354 if (!classifyInst(
T, Early, Late))
1365 for (
unsigned i = 0; i <
Internal.size(); ++i) {
1366 auto *
R = dyn_cast<Instruction>(Internal[i]);
1370 auto *
T = dyn_cast<Instruction>(
Op);
1371 if (
T &&
T->getParent() != LoopB)
1377 for (
Value *V : Inputs)
1378 if (!highBitsAreZero(V, IterCount))
1380 for (
Value *V : Internal)
1381 if (!keepsHighBitsZero(V, IterCount))
1386 std::map<Value*,Value*> ShiftMap;
1388 using CastMapType = std::map<std::pair<Value *, Type *>,
Value *>;
1390 CastMapType CastMap;
1394 auto H = CM.find(std::make_pair(V, Ty));
1397 Value *CV = IRB.CreateIntCast(V, Ty,
false);
1398 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1402 for (
auto I = LoopB->begin(), E = LoopB->end();
I != E; ++
I) {
1403 using namespace PatternMatch;
1405 if (isa<PHINode>(
I) || !
Users.count(&*
I))
1416 for (
auto &J :
I->operands()) {
1418 if (!isOperandShifted(&*
I,
Op))
1423 if (isa<ConstantInt>(
Op) && cast<ConstantInt>(
Op)->
isZero())
1426 auto F = ShiftMap.find(
Op);
1427 Value *
W = (
F != ShiftMap.end()) ?
F->second :
nullptr;
1429 IRB.SetInsertPoint(&*
I);
1433 Value *ShAmt = CIV, *ShVal =
Op;
1434 auto *VTy = cast<IntegerType>(ShVal->getType());
1435 auto *ATy = cast<IntegerType>(ShAmt->
getType());
1436 if (Late.count(&*
I))
1437 ShVal = IRB.CreateShl(
Op, ConstantInt::get(VTy, 1));
1441 if (VTy->getBitWidth() < ATy->getBitWidth())
1442 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1444 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1447 W = IRB.CreateShl(ShVal, ShAmt);
1448 ShiftMap.insert(std::make_pair(
Op, W));
1450 I->replaceUsesOfWith(
Op, W);
1460 for (
auto P = ExitB->
begin(), Q = ExitB->
end();
P != Q; ++
P) {
1461 if (!isa<PHINode>(
P))
1463 auto *PN = cast<PHINode>(
P);
1464 Value *
U = PN->getIncomingValueForBlock(LoopB);
1465 if (!
Users.count(U))
1467 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1473 cast<User>(S)->replaceUsesOfWith(S, PN);
1479void PolynomialMultiplyRecognize::cleanupLoopBody(
BasicBlock *LoopB) {
1480 for (
auto &
I : *LoopB)
1482 I.replaceAllUsesWith(SV);
1488unsigned PolynomialMultiplyRecognize::getInverseMxN(
unsigned QP) {
1491 std::array<char,32> Q,
C;
1493 for (
unsigned i = 0; i < 32; ++i) {
1510 for (
unsigned i = 1; i < 32; ++i) {
1518 for (
unsigned j = 0;
j < i; ++
j)
1519 T =
T ^ (
C[j] & Q[i-j]);
1524 for (
unsigned i = 0; i < 32; ++i)
1534 Module *
M = At->getParent()->getParent()->getParent();
1538 Value *
P = PV.P, *Q = PV.Q, *P0 =
P;
1539 unsigned IC = PV.IterCount;
1541 if (PV.M !=
nullptr)
1542 P0 =
P =
B.CreateXor(
P, PV.M);
1547 if (PV.IterCount != 32)
1548 P =
B.CreateAnd(
P, BMI);
1551 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1552 assert(QI && QI->getBitWidth() <= 32);
1555 unsigned M = (1 << PV.IterCount) - 1;
1556 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1557 unsigned QV = getInverseMxN(Tmp) &
M;
1558 auto *QVI = ConstantInt::get(QI->getType(), QV);
1559 P =
B.CreateCall(PMF, {
P, QVI});
1560 P =
B.CreateTrunc(
P, QI->getType());
1562 P =
B.CreateAnd(
P, BMI);
1565 Value *
R =
B.CreateCall(PMF, {
P, Q});
1567 if (PV.M !=
nullptr)
1568 R =
B.CreateXor(R,
B.CreateIntCast(P0,
R->getType(),
false));
1574 if (
const auto *CI = dyn_cast<const ConstantInt>(V))
1575 return CI->getValue().isNonNegative();
1579 switch (
I->getOpcode()) {
1580 case Instruction::LShr:
1581 if (
const auto SI = dyn_cast<const ConstantInt>(
I->getOperand(1)))
1582 return SI->getZExtValue() > 0;
1584 case Instruction::Or:
1585 case Instruction::Xor:
1588 case Instruction::And:
1595void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1596 S.addRule(
"sink-zext",
1599 if (
I->getOpcode() != Instruction::ZExt)
1604 switch (
T->getOpcode()) {
1605 case Instruction::And:
1606 case Instruction::Or:
1607 case Instruction::Xor:
1613 return B.CreateBinOp(cast<BinaryOperator>(
T)->
getOpcode(),
1614 B.CreateZExt(
T->getOperand(0),
I->getType()),
1615 B.CreateZExt(
T->getOperand(1),
I->getType()));
1617 S.addRule(
"xor/and -> and/xor",
1620 if (
I->getOpcode() != Instruction::Xor)
1622 Instruction *And0 = dyn_cast<Instruction>(
I->getOperand(0));
1623 Instruction *And1 = dyn_cast<Instruction>(
I->getOperand(1));
1626 if (And0->getOpcode() != Instruction::And ||
1627 And1->getOpcode() != Instruction::And)
1629 if (And0->getOperand(1) != And1->getOperand(1))
1632 return B.CreateAnd(
B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1633 And0->getOperand(1));
1635 S.addRule(
"sink binop into select",
1645 Value *
X = Sel->getTrueValue(), *
Y = Sel->getFalseValue();
1647 return B.CreateSelect(Sel->getCondition(),
1648 B.CreateBinOp(
Op,
X, Z),
1649 B.CreateBinOp(
Op,
Y, Z));
1654 Value *
Y = Sel->getTrueValue(), *
Z = Sel->getFalseValue();
1655 return B.CreateSelect(Sel->getCondition(),
1656 B.CreateBinOp(
Op,
X,
Y),
1657 B.CreateBinOp(
Op,
X, Z));
1661 S.addRule(
"fold select-select",
1671 if (Sel0->getCondition() ==
C)
1675 if (Sel1->getCondition() ==
C)
1676 return B.CreateSelect(
C, Sel->
getTrueValue(), Sel1->getFalseValue());
1680 S.addRule(
"or-signbit -> xor-signbit",
1683 if (
I->getOpcode() != Instruction::Or)
1685 ConstantInt *Msb = dyn_cast<ConstantInt>(
I->getOperand(1));
1692 S.addRule(
"sink lshr into binop",
1695 if (
I->getOpcode() != Instruction::LShr)
1701 case Instruction::And:
1702 case Instruction::Or:
1703 case Instruction::Xor:
1709 Value *S =
I->getOperand(1);
1714 S.addRule(
"expose bitop-const",
1717 auto IsBitOp = [](
unsigned Op) ->
bool {
1719 case Instruction::And:
1720 case Instruction::Or:
1721 case Instruction::Xor:
1727 if (!BitOp1 || !IsBitOp(BitOp1->
getOpcode()))
1730 if (!BitOp2 || !IsBitOp(BitOp2->
getOpcode()))
1743void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1744 S.addRule(
"(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1746 if (
I->getOpcode() != Instruction::And)
1749 ConstantInt *C0 = dyn_cast<ConstantInt>(
I->getOperand(1));
1752 if (
Xor->getOpcode() != Instruction::Xor)
1757 if (!And0 || And0->getOpcode() != Instruction::And)
1759 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1764 if (V0 != (V0 & V1))
1767 return B.CreateAnd(
B.CreateXor(And0->getOperand(0), And1), C0);
1771bool PolynomialMultiplyRecognize::recognize() {
1772 LLVM_DEBUG(
dbgs() <<
"Starting PolynomialMultiplyRecognize on loop\n"
1773 << *CurLoop <<
'\n');
1782 if (LoopB != CurLoop->getLoopLatch())
1785 if (ExitB ==
nullptr)
1787 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1788 if (EntryB ==
nullptr)
1791 unsigned IterCount = 0;
1792 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1793 if (isa<SCEVCouldNotCompute>(CT))
1795 if (
auto *CV = dyn_cast<SCEVConstant>(CT))
1796 IterCount = CV->getValue()->getZExtValue() + 1;
1798 Value *CIV = getCountIV(LoopB);
1801 PV.IterCount = IterCount;
1802 LLVM_DEBUG(
dbgs() <<
"Loop IV: " << *CIV <<
"\nIterCount: " << IterCount
1805 setupPreSimplifier(PreSimp);
1813 bool FoundPreScan =
false;
1814 auto FeedsPHI = [LoopB](
const Value *
V) ->
bool {
1815 for (
const Value *U :
V->users()) {
1816 if (
const auto *
P = dyn_cast<const PHINode>(U))
1817 if (
P->getParent() == LoopB)
1824 if (!SI || !FeedsPHI(SI))
1827 Simplifier::Context
C(SI);
1828 Value *
T = PreSimp.simplify(
C);
1829 SelectInst *SelI = (
T && isa<SelectInst>(
T)) ? cast<SelectInst>(
T) :
SI;
1830 LLVM_DEBUG(
dbgs() <<
"scanSelect(pre-scan): " << PE(
C, SelI) <<
'\n');
1831 if (scanSelect(SelI, LoopB, EntryB, CIV, PV,
true)) {
1832 FoundPreScan =
true;
1834 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1835 SI->replaceAllUsesWith(NewSel);
1842 if (!FoundPreScan) {
1852 if (!promoteTypes(LoopB, ExitB))
1855 Simplifier PostSimp;
1856 setupPostSimplifier(PostSimp);
1859 if (!SI || !FeedsPHI(SI))
1861 Simplifier::Context
C(SI);
1862 Value *
T = PostSimp.simplify(
C);
1863 SelectInst *SelI = dyn_cast_or_null<SelectInst>(
T);
1865 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1866 SI->replaceAllUsesWith(NewSel);
1872 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1874 cleanupLoopBody(LoopB);
1878 bool FoundScan =
false;
1880 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1884 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV,
false);
1893 dbgs() <<
"Found pmpy idiom: R = " << PP <<
".Q\n";
1895 dbgs() <<
"Found inverse pmpy idiom: R = (" << PP <<
"/Q).Q) + "
1897 dbgs() <<
" Res:" << *PV.Res <<
"\n P:" << *PV.P <<
"\n";
1899 dbgs() <<
" M:" << *PV.M <<
"\n";
1900 dbgs() <<
" Q:" << *PV.Q <<
"\n";
1901 dbgs() <<
" Iteration count:" << PV.IterCount <<
"\n";
1905 Value *PM = generate(At, PV);
1909 if (PM->
getType() != PV.Res->getType())
1913 PV.Res->eraseFromParent();
1917int HexagonLoopIdiomRecognize::getSCEVStride(
const SCEVAddRecExpr *S) {
1919 return SC->getAPInt().getSExtValue();
1923bool HexagonLoopIdiomRecognize::isLegalStore(
Loop *CurLoop,
StoreInst *SI) {
1928 Value *StoredVal =
SI->getValueOperand();
1929 Value *StorePtr =
SI->getPointerOperand();
1933 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1939 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1940 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1945 int Stride = getSCEVStride(StoreEv);
1948 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1949 if (StoreSize !=
unsigned(std::abs(Stride)))
1953 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
1961 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1962 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1966 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1978 const SCEV *BECount,
unsigned StoreSize,
1988 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1998 for (
auto *
B : L->blocks())
2000 if (Ignored.
count(&
I) == 0 &&
2007void HexagonLoopIdiomRecognize::collectStores(
Loop *CurLoop,
BasicBlock *BB,
2012 if (isLegalStore(CurLoop, SI))
2016bool HexagonLoopIdiomRecognize::processCopyingStore(
Loop *CurLoop,
2019 "Expected only non-volatile stores, or Hexagon-specific memcpy"
2020 "to volatile destination.");
2022 Value *StorePtr =
SI->getPointerOperand();
2023 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2024 unsigned Stride = getSCEVStride(StoreEv);
2025 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
2026 if (Stride != StoreSize)
2032 auto *LI = cast<LoadInst>(
SI->getValueOperand());
2043 Type *IntPtrTy = Builder.getIntPtrTy(*
DL,
SI->getPointerAddressSpace());
2051 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2052 Builder.getPtrTy(
SI->getPointerAddressSpace()), ExpPt);
2053 Value *LoadBasePtr =
nullptr;
2055 bool Overlap =
false;
2056 bool DestVolatile =
SI->isVolatile();
2062 if (StoreSize != 4 ||
DL->getTypeSizeInBits(BECountTy) > 32) {
2066 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2068 StoreBasePtr =
nullptr;
2072 LoadBasePtr =
nullptr;
2081 StoreSize, *AA, Ignore1)) {
2085 BECount, StoreSize, *AA, Ignore1)) {
2087 goto CleanupAndExit;
2095 goto CleanupAndExit;
2100 if (
Func->hasFnAttribute(Attribute::AlwaysInline))
2101 goto CleanupAndExit;
2110 if (!coverLoop(CurLoop, Insts))
2111 goto CleanupAndExit;
2114 goto CleanupAndExit;
2117 goto CleanupAndExit;
2122 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2128 StoreSize, *AA, Ignore2))
2129 goto CleanupAndExit;
2132 bool StridePos = getSCEVStride(LoadEv) >= 0;
2135 if (!StridePos && DestVolatile)
2136 goto CleanupAndExit;
2138 bool RuntimeCheck = (Overlap || DestVolatile);
2145 if (ExitBlocks.
size() != 1)
2146 goto CleanupAndExit;
2147 ExitB = ExitBlocks[0];
2153 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2156 const SCEV *NumBytesS =
2157 SE->getAddExpr(BECount, SE->getOne(IntPtrTy),
SCEV::FlagNUW);
2159 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2161 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2162 if (
Instruction *In = dyn_cast<Instruction>(NumBytes))
2170 if (
ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2172 if (Threshold != 0 &&
C < Threshold)
2173 goto CleanupAndExit;
2175 goto CleanupAndExit;
2180 Loop *ParentL = LF->getLoopFor(Preheader);
2181 StringRef HeaderName = Header->getName();
2190 for (
auto &In : *Header) {
2191 PHINode *PN = dyn_cast<PHINode>(&In);
2207 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2208 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2209 Value *LowA = StridePos ? SA : LA;
2210 Value *HighA = StridePos ? LA : SA;
2211 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2216 Value *Dist = Builder.CreateSub(LowA, HighA);
2217 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2218 Value *CmpEither = Builder.CreateOr(
Cond, CmpD);
2221 if (Threshold != 0) {
2223 Value *Thr = ConstantInt::get(Ty, Threshold);
2224 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2225 Value *CmpBoth = Builder.CreateAnd(
Cond, CmpB);
2229 Func, NewPreheader);
2233 Builder.CreateCondBr(
Cond, MemmoveB, NewPreheader);
2248 if (ExitD && DT->
dominates(Preheader, ExitD)) {
2256 CondBuilder.CreateBr(ExitB);
2261 Type *PtrTy = PointerType::get(Ctx, 0);
2267 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2268 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2270 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2272 if (
Instruction *In = dyn_cast<Instruction>(NumWords))
2276 NewCall = CondBuilder.CreateCall(Fn,
2277 {StoreBasePtr, LoadBasePtr, NumWords});
2279 NewCall = CondBuilder.CreateMemMove(
2280 StoreBasePtr,
SI->getAlign(), LoadBasePtr, LI->
getAlign(), NumBytes);
2283 NewCall = Builder.CreateMemCpy(StoreBasePtr,
SI->getAlign(), LoadBasePtr,
2292 LLVM_DEBUG(
dbgs() <<
" Formed " << (Overlap ?
"memmove: " :
"memcpy: ")
2294 <<
" from load ptr=" << *LoadEv <<
" at: " << *LI <<
"\n"
2295 <<
" from store ptr=" << *StoreEv <<
" at: " << *SI
2304bool HexagonLoopIdiomRecognize::coverLoop(
Loop *L,
2307 for (
auto *
B :
L->blocks())
2316 for (
unsigned i = 0; i < Worklist.size(); ++i) {
2318 for (
auto I =
In->op_begin(), E =
In->op_end();
I != E; ++
I) {
2325 Worklist.insert(OpI);
2333 for (
auto *
B :
L->blocks()) {
2334 for (
auto &In : *
B) {
2335 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2337 if (!Worklist.count(&In) &&
In.mayHaveSideEffects())
2339 for (
auto *K :
In.users()) {
2344 if (LF->getLoopFor(UseB) != L)
2356bool HexagonLoopIdiomRecognize::runOnLoopBlock(
Loop *CurLoop,
BasicBlock *BB,
2361 auto DominatedByBB = [
this,BB] (
BasicBlock *EB) ->
bool {
2364 if (!
all_of(ExitBlocks, DominatedByBB))
2367 bool MadeChange =
false;
2370 collectStores(CurLoop, BB, Stores);
2373 for (
auto &SI : Stores)
2374 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2379bool HexagonLoopIdiomRecognize::runOnCountableLoop(
Loop *L) {
2380 PolynomialMultiplyRecognize PMR(L, *
DL, *DT, *TLI, *SE);
2381 if (PMR.recognize())
2384 if (!HasMemcpy && !HasMemmove)
2387 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2388 assert(!isa<SCEVCouldNotCompute>(BECount) &&
2389 "runOnCountableLoop() called on a loop without a predictable"
2390 "backedge-taken count");
2393 L->getUniqueExitBlocks(ExitBlocks);
2395 bool Changed =
false;
2398 for (
auto *BB :
L->getBlocks()) {
2400 if (LF->getLoopFor(BB) != L)
2402 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2408bool HexagonLoopIdiomRecognize::run(
Loop *L) {
2409 const Module &
M = *
L->getHeader()->getParent()->getParent();
2415 if (!
L->getLoopPreheader())
2420 if (
Name ==
"memset" ||
Name ==
"memcpy" ||
Name ==
"memmove")
2423 DL = &
L->getHeader()->getDataLayout();
2425 HasMemcpy = TLI->has(LibFunc_memcpy);
2426 HasMemmove = TLI->has(LibFunc_memmove);
2428 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2429 return runOnCountableLoop(L);
2433bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(
Loop *L,
2438 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2439 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2440 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2441 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2442 *
L->getHeader()->getParent());
2443 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2444 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2448 return new HexagonLoopIdiomRecognizeLegacyPass();
2455 return HexagonLoopIdiomRecognize(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
TLI, &AR.
SE)
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_USED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static cl::opt< unsigned > SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"))
hexagon loop Recognize Hexagon specific loop idioms
static cl::opt< bool > DisableMemcpyIdiom("disable-memcpy-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memcpy in loop idiom recognition"))
static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB)
static cl::opt< unsigned > RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " "check guarding the memmove."))
static cl::opt< bool > HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination."))
static cl::opt< bool > DisableMemmoveIdiom("disable-memmove-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memmove in loop idiom recognition"))
static cl::opt< unsigned > CompileTimeMemSizeThreshold("compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), cl::desc("Threshold (in bytes) to perform the transformation, if the " "runtime loop count (mem transfer size) is known at compile-time."))
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
static const char * HexagonVolatileMemcpyName
static bool hasZeroSignBit(const Value *V)
static cl::opt< bool > OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Move duplicate certain instructions close to their use
This header provides classes for managing per-loop analyses.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
void setIDom(DomTreeNodeBase *NewIDom)
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
A possibly irreducible generalization of a Loop.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Represents a single loop in the control flow graph.
Representation for a specific memory location.
A Module instance is used to store all the information related to an LLVM module.
void setIncomingBlock(unsigned i, BasicBlock *BB)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
A vector that has set insertion semantics.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
ArchType getArch() const
Get the parsed architecture type of this triple.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
This class represents zero extension of integer types.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
void link(std::unique_ptr< LinkGraph > G, std::unique_ptr< JITLinkContext > Ctx)
Link the given graph.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto pred_end(const MachineBasicBlock *BB)
Pass * createHexagonLoopIdiomPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto pred_begin(const MachineBasicBlock *BB)
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...