29 #include "llvm/IR/IntrinsicsHexagon.h" 51 #define DEBUG_TYPE "hexagon-vlcr" 53 STATISTIC(HexagonNumVectorLoopCarriedReuse,
54 "Number of values that were reused from a previous iteration.");
58 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
74 ChainOfDependences Chain;
77 bool isIdentical(DepChain &
Other)
const {
80 ChainOfDependences &OtherChain =
Other.getChain();
81 for (
int i = 0; i <
size(); ++i) {
82 if (Chain[i] != OtherChain[i])
88 ChainOfDependences &getChain() {
104 int iterations()
const {
109 return Chain.front();
125 const ChainOfDependences &CD =
D.Chain;
126 int ChainSize = CD.size();
127 OS <<
"**DepChain Start::**\n";
128 for (
int i = 0; i < ChainSize -1; ++i) {
129 OS << *(CD[i]) <<
" -->\n";
131 OS << *CD[ChainSize-1] <<
"\n";
142 std::map<Instruction *, DepChain *> DepChains;
145 ReuseValue() =
default;
148 Inst2Replace =
nullptr;
149 BackedgeInst =
nullptr;
153 bool isDefined() {
return Inst2Replace !=
nullptr; }
158 OS <<
"** ReuseValue ***\n";
159 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
160 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
164 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
168 explicit HexagonVectorLoopCarriedReuseLegacyPass() :
LoopPass(
ID) {
174 return "Hexagon-specific loop carried reuse for HVX vectors";
187 class HexagonVectorLoopCarriedReuse {
189 HexagonVectorLoopCarriedReuse(
Loop *L) : CurLoop(L){};
195 std::set<Instruction *> ReplacedInsts;
197 ReuseValue ReuseCandidate;
200 void findLoopCarriedDeps();
201 void findValueToReuse();
216 "Hexagon-specific predictive commoning for HVX vectors",
221 "Hexagon-specific predictive commoning
for HVX
vectors",
228 HexagonVectorLoopCarriedReuse Vlcr(&L);
236 bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
240 HexagonVectorLoopCarriedReuse Vlcr(L);
244 bool HexagonVectorLoopCarriedReuse::run() {
245 if (!CurLoop->getLoopPreheader())
249 if (!CurLoop->getSubLoops().empty())
253 if (CurLoop->getNumBlocks() != 1)
259 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(
CallInst *
C) {
261 case Intrinsic::hexagon_V6_vaddb:
262 case Intrinsic::hexagon_V6_vaddb_128B:
263 case Intrinsic::hexagon_V6_vaddh:
264 case Intrinsic::hexagon_V6_vaddh_128B:
265 case Intrinsic::hexagon_V6_vaddw:
266 case Intrinsic::hexagon_V6_vaddw_128B:
267 case Intrinsic::hexagon_V6_vaddubh:
268 case Intrinsic::hexagon_V6_vaddubh_128B:
269 case Intrinsic::hexagon_V6_vadduhw:
270 case Intrinsic::hexagon_V6_vadduhw_128B:
271 case Intrinsic::hexagon_V6_vaddhw:
272 case Intrinsic::hexagon_V6_vaddhw_128B:
273 case Intrinsic::hexagon_V6_vmaxb:
274 case Intrinsic::hexagon_V6_vmaxb_128B:
275 case Intrinsic::hexagon_V6_vmaxh:
276 case Intrinsic::hexagon_V6_vmaxh_128B:
277 case Intrinsic::hexagon_V6_vmaxw:
278 case Intrinsic::hexagon_V6_vmaxw_128B:
279 case Intrinsic::hexagon_V6_vmaxub:
280 case Intrinsic::hexagon_V6_vmaxub_128B:
281 case Intrinsic::hexagon_V6_vmaxuh:
282 case Intrinsic::hexagon_V6_vmaxuh_128B:
283 case Intrinsic::hexagon_V6_vminub:
284 case Intrinsic::hexagon_V6_vminub_128B:
285 case Intrinsic::hexagon_V6_vminuh:
286 case Intrinsic::hexagon_V6_vminuh_128B:
287 case Intrinsic::hexagon_V6_vminb:
288 case Intrinsic::hexagon_V6_vminb_128B:
289 case Intrinsic::hexagon_V6_vminh:
290 case Intrinsic::hexagon_V6_vminh_128B:
291 case Intrinsic::hexagon_V6_vminw:
292 case Intrinsic::hexagon_V6_vminw_128B:
293 case Intrinsic::hexagon_V6_vmpyub:
294 case Intrinsic::hexagon_V6_vmpyub_128B:
295 case Intrinsic::hexagon_V6_vmpyuh:
296 case Intrinsic::hexagon_V6_vmpyuh_128B:
297 case Intrinsic::hexagon_V6_vavgub:
298 case Intrinsic::hexagon_V6_vavgub_128B:
299 case Intrinsic::hexagon_V6_vavgh:
300 case Intrinsic::hexagon_V6_vavgh_128B:
301 case Intrinsic::hexagon_V6_vavguh:
302 case Intrinsic::hexagon_V6_vavguh_128B:
303 case Intrinsic::hexagon_V6_vavgw:
304 case Intrinsic::hexagon_V6_vavgw_128B:
305 case Intrinsic::hexagon_V6_vavgb:
306 case Intrinsic::hexagon_V6_vavgb_128B:
307 case Intrinsic::hexagon_V6_vavguw:
308 case Intrinsic::hexagon_V6_vavguw_128B:
309 case Intrinsic::hexagon_V6_vabsdiffh:
310 case Intrinsic::hexagon_V6_vabsdiffh_128B:
311 case Intrinsic::hexagon_V6_vabsdiffub:
312 case Intrinsic::hexagon_V6_vabsdiffub_128B:
313 case Intrinsic::hexagon_V6_vabsdiffuh:
314 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
315 case Intrinsic::hexagon_V6_vabsdiffw:
316 case Intrinsic::hexagon_V6_vabsdiffw_128B:
323 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(
Instruction *I1,
331 if (
CallInst *C1 = dyn_cast<CallInst>(I1)) {
332 if (
CallInst *C2 = dyn_cast<CallInst>(I2)) {
333 if (C1->getCalledFunction() != C2->getCalledFunction())
342 for (
unsigned i = 0; i < NumOperands; ++i) {
355 bool HexagonVectorLoopCarriedReuse::canReplace(
Instruction *
I) {
361 case Intrinsic::hexagon_V6_hi:
362 case Intrinsic::hexagon_V6_lo:
363 case Intrinsic::hexagon_V6_hi_128B:
364 case Intrinsic::hexagon_V6_lo_128B:
365 LLVM_DEBUG(
dbgs() <<
"Not considering for reuse: " << *II <<
"\n");
371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (
auto *
D : Dependences) {
373 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
377 <<
".. Skipping because number of iterations > than the limit\n");
381 PHINode *PN = cast<PHINode>(
D->front());
383 int Iters =
D->iterations();
386 <<
" can be reused\n");
393 if (
User->getParent() != BB)
395 if (ReplacedInsts.count(
User)) {
397 <<
" has already been replaced. Skipping...\n");
400 if (isa<PHINode>(
User))
402 if (
User->mayHaveSideEffects())
404 if (!canReplace(
User))
418 for (
auto UI = BEInst->use_begin(),
E = BEInst->use_end(); UI !=
E;
425 if (!isEquivalentOperation(
I, BEUser))
428 int NumOperands =
I->getNumOperands();
439 std::map<Instruction *, DepChain *> DepChains;
441 if ((
I &&
I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
443 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
447 for (
int T = 0;
T < NumOperands; ++
T) {
449 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
450 if (!OpInst && !BEOpInst) {
457 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
460 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
464 DepChains[OpInst] =
D;
475 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
489 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
490 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
493 DepChains[OpInst] =
D;
502 ReuseCandidate.Inst2Replace =
I;
503 ReuseCandidate.BackedgeInst = BEUser;
504 ReuseCandidate.DepChains = DepChains;
505 ReuseCandidate.Iterations = Iters;
508 ReuseCandidate.reset();
512 ReuseCandidate.reset();
515 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
523 void HexagonVectorLoopCarriedReuse::reuseValue() {
525 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
528 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
529 int Iterations = ReuseCandidate.Iterations;
530 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
531 assert(!DepChains.empty() &&
"No DepChains");
532 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
535 for (
int i = 0; i < Iterations; ++i) {
538 for (
int j = 0; j < NumOperands; ++j) {
543 DepChain &
D = *DepChains[
I];
547 Value *ValInPreheader = findValueInBlock(
D[i], LoopPH);
548 InstInPreheader->
setOperand(j, ValInPreheader);
550 InstsInPreheader.
push_back(InstInPreheader);
551 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
559 Value *BEVal = BEInst;
561 for (
int i = Iterations-1; i >=0 ; --i) {
562 Instruction *InstInPreheader = InstsInPreheader[i];
563 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
573 ReplacedInsts.insert(Inst2Replace);
574 ++HexagonNumVectorLoopCarriedReuse;
577 bool HexagonVectorLoopCarriedReuse::doVLCR() {
578 assert(CurLoop->getSubLoops().empty() &&
579 "Can do VLCR on the innermost loop only");
580 assert((CurLoop->getNumBlocks() == 1) &&
581 "Can do VLCR only on single block loops");
583 bool Changed =
false;
586 LLVM_DEBUG(
dbgs() <<
"Working on Loop: " << *CurLoop->getHeader() <<
"\n");
592 findLoopCarriedDeps();
594 if (ReuseCandidate.isDefined()) {
604 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(
Instruction *
I,
612 if (NumIncomingValues != 2) {
618 if (BB != CurLoop->getHeader()) {
624 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
627 assert(BEInst &&
"There should be a value over the backedge");
631 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
636 findDepChainFromPHI(BEInst,
D);
640 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(
Instruction *I1,
643 for (
auto *
D : Dependences) {
644 if (
D->front() == I1 &&
D->back() == I2 &&
D->iterations() == Iters)
650 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
652 for (
auto I = BB->
begin(),
E = BB->
end();
I !=
E && isa<PHINode>(
I); ++
I) {
653 auto *PN = cast<PHINode>(
I);
654 if (!isa<VectorType>(PN->
getType()))
657 DepChain *
D =
new DepChain();
658 findDepChainFromPHI(PN, *
D);
660 Dependences.insert(
D);
664 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
665 LLVM_DEBUG(
for (
size_t i = 0; i < Dependences.size();
666 ++i) {
dbgs() << *Dependences[i] <<
"\n"; });
670 return new HexagonVectorLoopCarriedReuseLegacyPass();
Pass interface - Implemented by all 'passes'.
INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr", "Hexagon-specific predictive commoning for HVX vectors", false, false) INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one.
This class represents lattice values for constants.
void push_back(const T &Elt)
This class represents a function call, abstracting a target machine's calling convention.
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2), cl::ZeroOrMore)
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool isVectorTy() const
True if this is an instance of VectorType.
This defines the Use class.
iterator begin()
Instruction iterator methods.
hexagon Hexagon specific predictive commoning for HVX vectors
A Use represents the edge between a Value definition and its users.
Hexagon Vector Loop Carried Reuse Pass.
void setName(const Twine &Name)
Change the name of the value.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Type * getType() const
All values are typed, get the type of this value.
AnalysisUsage & addPreservedID(const void *ID)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Value * getOperand(unsigned i) const
initializer< Ty > init(const Ty &Val)
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
A set of analyses that are preserved following a run of a transformation pass.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
LLVM Basic Block Representation.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
Represent the analysis usage information of a pass.
#define LLVM_ATTRIBUTE_UNUSED
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
User * getUser() const
Returns the User that contains this Use.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getNumOperands() const
This is the shared class of boolean and integer constants.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
AnalysisUsage & addRequiredID(const void *ID)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void setPreservesCFG()
This function should be called by the pass, iff they do not:
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Represents analyses that only rely on functions' control flow.
static void clear(coro::Shape &Shape)
Represents a single loop in the control flow graph.
void preserveSet()
Mark an analysis set as preserved.
StringRef getName() const
Return a constant reference to the value's name.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM Value Representation.
A vector that has set insertion semantics.
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef - Represent a constant reference to a string, i.e.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
A container for analyses that lazily runs them and caches their results.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A wrapper class for inspecting calls to intrinsic functions.
const BasicBlock * getParent() const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)