29#include "llvm/IR/IntrinsicsHexagon.h"
51#define DEBUG_TYPE "hexagon-vlcr"
54 "Number of values that were reused from a previous iteration.");
58 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
74 ChainOfDependences Chain;
77 bool isIdentical(DepChain &
Other)
const {
80 ChainOfDependences &OtherChain =
Other.getChain();
81 for (
int i = 0; i <
size(); ++i) {
82 if (Chain[i] != OtherChain[i])
88 ChainOfDependences &getChain() {
104 int iterations()
const {
109 return Chain.front();
125 const ChainOfDependences &CD =
D.Chain;
126 int ChainSize = CD.size();
127 OS <<
"**DepChain Start::**\n";
128 for (
int i = 0; i < ChainSize -1; ++i) {
129 OS << *(CD[i]) <<
" -->\n";
131 OS << *CD[ChainSize-1] <<
"\n";
142 std::map<Instruction *, DepChain *> DepChains;
145 ReuseValue() =
default;
148 Inst2Replace =
nullptr;
149 BackedgeInst =
nullptr;
153 bool isDefined() {
return Inst2Replace !=
nullptr; }
158 OS <<
"** ReuseValue ***\n";
159 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
160 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
164 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
168 explicit HexagonVectorLoopCarriedReuseLegacyPass() :
LoopPass(
ID) {
174 return "Hexagon-specific loop carried reuse for HVX vectors";
187 class HexagonVectorLoopCarriedReuse {
189 HexagonVectorLoopCarriedReuse(
Loop *L) : CurLoop(
L){};
195 std::set<Instruction *> ReplacedInsts;
197 ReuseValue ReuseCandidate;
200 void findLoopCarriedDeps();
201 void findValueToReuse();
213char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
216 "Hexagon-specific predictive commoning for HVX vectors",
228 HexagonVectorLoopCarriedReuse Vlcr(&L);
236bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
240 HexagonVectorLoopCarriedReuse Vlcr(L);
244bool HexagonVectorLoopCarriedReuse::run() {
245 if (!CurLoop->getLoopPreheader())
249 if (!CurLoop->getSubLoops().empty())
253 if (CurLoop->getNumBlocks() != 1)
259bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(
CallInst *
C) {
260 switch (
C->getCalledFunction()->getIntrinsicID()) {
261 case Intrinsic::hexagon_V6_vaddb:
262 case Intrinsic::hexagon_V6_vaddb_128B:
263 case Intrinsic::hexagon_V6_vaddh:
264 case Intrinsic::hexagon_V6_vaddh_128B:
265 case Intrinsic::hexagon_V6_vaddw:
266 case Intrinsic::hexagon_V6_vaddw_128B:
267 case Intrinsic::hexagon_V6_vaddubh:
268 case Intrinsic::hexagon_V6_vaddubh_128B:
269 case Intrinsic::hexagon_V6_vadduhw:
270 case Intrinsic::hexagon_V6_vadduhw_128B:
271 case Intrinsic::hexagon_V6_vaddhw:
272 case Intrinsic::hexagon_V6_vaddhw_128B:
273 case Intrinsic::hexagon_V6_vmaxb:
274 case Intrinsic::hexagon_V6_vmaxb_128B:
275 case Intrinsic::hexagon_V6_vmaxh:
276 case Intrinsic::hexagon_V6_vmaxh_128B:
277 case Intrinsic::hexagon_V6_vmaxw:
278 case Intrinsic::hexagon_V6_vmaxw_128B:
279 case Intrinsic::hexagon_V6_vmaxub:
280 case Intrinsic::hexagon_V6_vmaxub_128B:
281 case Intrinsic::hexagon_V6_vmaxuh:
282 case Intrinsic::hexagon_V6_vmaxuh_128B:
283 case Intrinsic::hexagon_V6_vminub:
284 case Intrinsic::hexagon_V6_vminub_128B:
285 case Intrinsic::hexagon_V6_vminuh:
286 case Intrinsic::hexagon_V6_vminuh_128B:
287 case Intrinsic::hexagon_V6_vminb:
288 case Intrinsic::hexagon_V6_vminb_128B:
289 case Intrinsic::hexagon_V6_vminh:
290 case Intrinsic::hexagon_V6_vminh_128B:
291 case Intrinsic::hexagon_V6_vminw:
292 case Intrinsic::hexagon_V6_vminw_128B:
293 case Intrinsic::hexagon_V6_vmpyub:
294 case Intrinsic::hexagon_V6_vmpyub_128B:
295 case Intrinsic::hexagon_V6_vmpyuh:
296 case Intrinsic::hexagon_V6_vmpyuh_128B:
297 case Intrinsic::hexagon_V6_vavgub:
298 case Intrinsic::hexagon_V6_vavgub_128B:
299 case Intrinsic::hexagon_V6_vavgh:
300 case Intrinsic::hexagon_V6_vavgh_128B:
301 case Intrinsic::hexagon_V6_vavguh:
302 case Intrinsic::hexagon_V6_vavguh_128B:
303 case Intrinsic::hexagon_V6_vavgw:
304 case Intrinsic::hexagon_V6_vavgw_128B:
305 case Intrinsic::hexagon_V6_vavgb:
306 case Intrinsic::hexagon_V6_vavgb_128B:
307 case Intrinsic::hexagon_V6_vavguw:
308 case Intrinsic::hexagon_V6_vavguw_128B:
309 case Intrinsic::hexagon_V6_vabsdiffh:
310 case Intrinsic::hexagon_V6_vabsdiffh_128B:
311 case Intrinsic::hexagon_V6_vabsdiffub:
312 case Intrinsic::hexagon_V6_vabsdiffub_128B:
313 case Intrinsic::hexagon_V6_vabsdiffuh:
314 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
315 case Intrinsic::hexagon_V6_vabsdiffw:
316 case Intrinsic::hexagon_V6_vabsdiffw_128B:
323bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(
Instruction *I1,
325 if (!
I1->isSameOperationAs(I2))
331 if (
CallInst *C1 = dyn_cast<CallInst>(I1)) {
332 if (
CallInst *C2 = dyn_cast<CallInst>(I2)) {
333 if (C1->getCalledFunction() != C2->getCalledFunction())
341 unsigned NumOperands =
I1->getNumOperands();
342 for (
unsigned i = 0; i < NumOperands; ++i) {
355bool HexagonVectorLoopCarriedReuse::canReplace(
Instruction *
I) {
360 switch (
II->getIntrinsicID()) {
361 case Intrinsic::hexagon_V6_hi:
362 case Intrinsic::hexagon_V6_lo:
363 case Intrinsic::hexagon_V6_hi_128B:
364 case Intrinsic::hexagon_V6_lo_128B:
371void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (
auto *
D : Dependences) {
373 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
377 <<
".. Skipping because number of iterations > than the limit\n");
381 PHINode *PN = cast<PHINode>(
D->front());
383 int Iters =
D->iterations();
386 <<
" can be reused\n");
392 if (
User->getParent() != BB)
394 if (ReplacedInsts.count(
User)) {
396 <<
" has already been replaced. Skipping...\n");
399 if (isa<PHINode>(
User))
401 if (
User->mayHaveSideEffects())
403 if (!canReplace(
User))
417 for (
Use &U : BEInst->
uses()) {
422 if (!isEquivalentOperation(
I, BEUser))
425 int NumOperands =
I->getNumOperands();
436 std::map<Instruction *, DepChain *> DepChains;
438 if ((
I &&
I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
440 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
444 for (
int T = 0;
T < NumOperands; ++
T) {
446 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
447 if (!OpInst && !BEOpInst) {
454 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
457 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
461 DepChains[OpInst] =
D;
472 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
486 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
487 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
490 DepChains[OpInst] =
D;
499 ReuseCandidate.Inst2Replace =
I;
500 ReuseCandidate.BackedgeInst = BEUser;
501 ReuseCandidate.DepChains = DepChains;
502 ReuseCandidate.Iterations = Iters;
505 ReuseCandidate.reset();
509 ReuseCandidate.reset();
512Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
520void HexagonVectorLoopCarriedReuse::reuseValue() {
522 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
525 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
526 int Iterations = ReuseCandidate.Iterations;
527 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
528 assert(!DepChains.empty() &&
"No DepChains");
529 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
532 for (
int i = 0; i < Iterations; ++i) {
535 for (
int j = 0;
j < NumOperands; ++
j) {
540 DepChain &
D = *DepChains[
I];
544 Value *ValInPreheader = findValueInBlock(
D[i], LoopPH);
545 InstInPreheader->
setOperand(j, ValInPreheader);
547 InstsInPreheader.
push_back(InstInPreheader);
548 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
556 Value *BEVal = BEInst;
558 for (
int i = Iterations-1; i >=0 ; --i) {
559 Instruction *InstInPreheader = InstsInPreheader[i];
560 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
570 ReplacedInsts.insert(Inst2Replace);
571 ++HexagonNumVectorLoopCarriedReuse;
574bool HexagonVectorLoopCarriedReuse::doVLCR() {
575 assert(CurLoop->getSubLoops().empty() &&
576 "Can do VLCR on the innermost loop only");
577 assert((CurLoop->getNumBlocks() == 1) &&
578 "Can do VLCR only on single block loops");
580 bool Changed =
false;
583 LLVM_DEBUG(
dbgs() <<
"Working on Loop: " << *CurLoop->getHeader() <<
"\n");
589 findLoopCarriedDeps();
591 if (ReuseCandidate.isDefined()) {
601void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(
Instruction *
I,
609 if (NumIncomingValues != 2) {
615 if (BB != CurLoop->getHeader()) {
621 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
624 assert(BEInst &&
"There should be a value over the backedge");
628 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
633 findDepChainFromPHI(BEInst,
D);
637DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(
Instruction *I1,
640 for (
auto *
D : Dependences) {
641 if (
D->front() == I1 &&
D->back() == I2 &&
D->iterations() == Iters)
647void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
649 for (
auto I = BB->
begin(), E = BB->
end();
I != E && isa<PHINode>(
I); ++
I) {
650 auto *PN = cast<PHINode>(
I);
651 if (!isa<VectorType>(PN->
getType()))
654 DepChain *
D =
new DepChain();
655 findDepChainFromPHI(PN, *
D);
657 Dependences.insert(
D);
661 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
666 return new HexagonVectorLoopCarriedReuseLegacyPass();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_UNUSED
static void clear(coro::Shape &Shape)
std::optional< std::vector< StOtherPiece > > Other
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
hexagon Hexagon specific predictive commoning for HVX vectors
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
This class represents an Operation in the Expression.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
A vector that has set insertion semantics.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool isVectorTy() const
True if this is an instance of VectorType.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
Hexagon Vector Loop Carried Reuse Pass.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...