29#include "llvm/IR/IntrinsicsHexagon.h"
48#define DEBUG_TYPE "hexagon-vlcr"
51 "Number of values that were reused from a previous iteration.");
55 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
71 ChainOfDependences Chain;
74 bool isIdentical(DepChain &
Other)
const {
77 ChainOfDependences &OtherChain =
Other.getChain();
78 for (
int i = 0; i <
size(); ++i) {
79 if (Chain[i] != OtherChain[i])
85 ChainOfDependences &getChain() {
101 int iterations()
const {
106 return Chain.front();
122 const ChainOfDependences &CD =
D.Chain;
123 int ChainSize = CD.size();
124 OS <<
"**DepChain Start::**\n";
125 for (
int i = 0; i < ChainSize -1; ++i) {
126 OS << *(CD[i]) <<
" -->\n";
128 OS << *CD[ChainSize-1] <<
"\n";
139 std::map<Instruction *, DepChain *> DepChains;
142 ReuseValue() =
default;
145 Inst2Replace =
nullptr;
146 BackedgeInst =
nullptr;
150 bool isDefined() {
return Inst2Replace !=
nullptr; }
155 OS <<
"** ReuseValue ***\n";
156 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
157 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
161 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
165 explicit HexagonVectorLoopCarriedReuseLegacyPass() :
LoopPass(
ID) {
171 return "Hexagon-specific loop carried reuse for HVX vectors";
184 class HexagonVectorLoopCarriedReuse {
186 HexagonVectorLoopCarriedReuse(
Loop *L) : CurLoop(
L){};
192 std::set<Instruction *> ReplacedInsts;
194 ReuseValue ReuseCandidate;
197 void findLoopCarriedDeps();
198 void findValueToReuse();
210char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
213 "Hexagon-specific predictive commoning for HVX vectors",
225 HexagonVectorLoopCarriedReuse Vlcr(&L);
233bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
237 HexagonVectorLoopCarriedReuse Vlcr(L);
241bool HexagonVectorLoopCarriedReuse::run() {
242 if (!CurLoop->getLoopPreheader())
246 if (!CurLoop->getSubLoops().empty())
250 if (CurLoop->getNumBlocks() != 1)
256bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(
CallInst *
C) {
257 switch (
C->getCalledFunction()->getIntrinsicID()) {
258 case Intrinsic::hexagon_V6_vaddb:
259 case Intrinsic::hexagon_V6_vaddb_128B:
260 case Intrinsic::hexagon_V6_vaddh:
261 case Intrinsic::hexagon_V6_vaddh_128B:
262 case Intrinsic::hexagon_V6_vaddw:
263 case Intrinsic::hexagon_V6_vaddw_128B:
264 case Intrinsic::hexagon_V6_vaddubh:
265 case Intrinsic::hexagon_V6_vaddubh_128B:
266 case Intrinsic::hexagon_V6_vadduhw:
267 case Intrinsic::hexagon_V6_vadduhw_128B:
268 case Intrinsic::hexagon_V6_vaddhw:
269 case Intrinsic::hexagon_V6_vaddhw_128B:
270 case Intrinsic::hexagon_V6_vmaxb:
271 case Intrinsic::hexagon_V6_vmaxb_128B:
272 case Intrinsic::hexagon_V6_vmaxh:
273 case Intrinsic::hexagon_V6_vmaxh_128B:
274 case Intrinsic::hexagon_V6_vmaxw:
275 case Intrinsic::hexagon_V6_vmaxw_128B:
276 case Intrinsic::hexagon_V6_vmaxub:
277 case Intrinsic::hexagon_V6_vmaxub_128B:
278 case Intrinsic::hexagon_V6_vmaxuh:
279 case Intrinsic::hexagon_V6_vmaxuh_128B:
280 case Intrinsic::hexagon_V6_vminub:
281 case Intrinsic::hexagon_V6_vminub_128B:
282 case Intrinsic::hexagon_V6_vminuh:
283 case Intrinsic::hexagon_V6_vminuh_128B:
284 case Intrinsic::hexagon_V6_vminb:
285 case Intrinsic::hexagon_V6_vminb_128B:
286 case Intrinsic::hexagon_V6_vminh:
287 case Intrinsic::hexagon_V6_vminh_128B:
288 case Intrinsic::hexagon_V6_vminw:
289 case Intrinsic::hexagon_V6_vminw_128B:
290 case Intrinsic::hexagon_V6_vmpyub:
291 case Intrinsic::hexagon_V6_vmpyub_128B:
292 case Intrinsic::hexagon_V6_vmpyuh:
293 case Intrinsic::hexagon_V6_vmpyuh_128B:
294 case Intrinsic::hexagon_V6_vavgub:
295 case Intrinsic::hexagon_V6_vavgub_128B:
296 case Intrinsic::hexagon_V6_vavgh:
297 case Intrinsic::hexagon_V6_vavgh_128B:
298 case Intrinsic::hexagon_V6_vavguh:
299 case Intrinsic::hexagon_V6_vavguh_128B:
300 case Intrinsic::hexagon_V6_vavgw:
301 case Intrinsic::hexagon_V6_vavgw_128B:
302 case Intrinsic::hexagon_V6_vavgb:
303 case Intrinsic::hexagon_V6_vavgb_128B:
304 case Intrinsic::hexagon_V6_vavguw:
305 case Intrinsic::hexagon_V6_vavguw_128B:
306 case Intrinsic::hexagon_V6_vabsdiffh:
307 case Intrinsic::hexagon_V6_vabsdiffh_128B:
308 case Intrinsic::hexagon_V6_vabsdiffub:
309 case Intrinsic::hexagon_V6_vabsdiffub_128B:
310 case Intrinsic::hexagon_V6_vabsdiffuh:
311 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
312 case Intrinsic::hexagon_V6_vabsdiffw:
313 case Intrinsic::hexagon_V6_vabsdiffw_128B:
320bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(
Instruction *I1,
322 if (!
I1->isSameOperationAs(I2))
328 if (
CallInst *C1 = dyn_cast<CallInst>(I1)) {
329 if (
CallInst *C2 = dyn_cast<CallInst>(I2)) {
330 if (C1->getCalledFunction() != C2->getCalledFunction())
338 unsigned NumOperands =
I1->getNumOperands();
339 for (
unsigned i = 0; i < NumOperands; ++i) {
352bool HexagonVectorLoopCarriedReuse::canReplace(
Instruction *
I) {
357 switch (
II->getIntrinsicID()) {
358 case Intrinsic::hexagon_V6_hi:
359 case Intrinsic::hexagon_V6_lo:
360 case Intrinsic::hexagon_V6_hi_128B:
361 case Intrinsic::hexagon_V6_lo_128B:
368void HexagonVectorLoopCarriedReuse::findValueToReuse() {
369 for (
auto *
D : Dependences) {
370 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
374 <<
".. Skipping because number of iterations > than the limit\n");
378 PHINode *PN = cast<PHINode>(
D->front());
380 int Iters =
D->iterations();
383 <<
" can be reused\n");
389 if (
User->getParent() != BB)
391 if (ReplacedInsts.count(
User)) {
393 <<
" has already been replaced. Skipping...\n");
396 if (isa<PHINode>(
User))
398 if (
User->mayHaveSideEffects())
400 if (!canReplace(
User))
414 for (
Use &U : BEInst->
uses()) {
419 if (!isEquivalentOperation(
I, BEUser))
422 int NumOperands =
I->getNumOperands();
433 std::map<Instruction *, DepChain *> DepChains;
435 if ((
I &&
I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
437 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
441 for (
int T = 0;
T < NumOperands; ++
T) {
443 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
444 if (!OpInst && !BEOpInst) {
451 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
454 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
458 DepChains[OpInst] =
D;
469 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
483 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
484 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
487 DepChains[OpInst] =
D;
496 ReuseCandidate.Inst2Replace =
I;
497 ReuseCandidate.BackedgeInst = BEUser;
498 ReuseCandidate.DepChains = DepChains;
499 ReuseCandidate.Iterations = Iters;
502 ReuseCandidate.reset();
506 ReuseCandidate.reset();
509Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
517void HexagonVectorLoopCarriedReuse::reuseValue() {
519 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
522 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
523 int Iterations = ReuseCandidate.Iterations;
524 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
525 assert(!DepChains.empty() &&
"No DepChains");
526 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
529 for (
int i = 0; i < Iterations; ++i) {
532 for (
int j = 0;
j < NumOperands; ++
j) {
537 DepChain &
D = *DepChains[
I];
541 Value *ValInPreheader = findValueInBlock(
D[i], LoopPH);
542 InstInPreheader->
setOperand(j, ValInPreheader);
544 InstsInPreheader.
push_back(InstInPreheader);
545 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
553 Value *BEVal = BEInst;
555 for (
int i = Iterations-1; i >=0 ; --i) {
556 Instruction *InstInPreheader = InstsInPreheader[i];
557 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
567 ReplacedInsts.insert(Inst2Replace);
568 ++HexagonNumVectorLoopCarriedReuse;
571bool HexagonVectorLoopCarriedReuse::doVLCR() {
572 assert(CurLoop->getSubLoops().empty() &&
573 "Can do VLCR on the innermost loop only");
574 assert((CurLoop->getNumBlocks() == 1) &&
575 "Can do VLCR only on single block loops");
577 bool Changed =
false;
580 LLVM_DEBUG(
dbgs() <<
"Working on Loop: " << *CurLoop->getHeader() <<
"\n");
586 findLoopCarriedDeps();
588 if (ReuseCandidate.isDefined()) {
598void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(
Instruction *
I,
606 if (NumIncomingValues != 2) {
612 if (BB != CurLoop->getHeader()) {
618 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
621 assert(BEInst &&
"There should be a value over the backedge");
625 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
630 findDepChainFromPHI(BEInst,
D);
634DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(
Instruction *I1,
637 for (
auto *
D : Dependences) {
638 if (
D->front() == I1 &&
D->back() == I2 &&
D->iterations() == Iters)
644void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
646 for (
auto I = BB->
begin(), E = BB->
end();
I != E && isa<PHINode>(
I); ++
I) {
647 auto *PN = cast<PHINode>(
I);
648 if (!isa<VectorType>(PN->
getType()))
651 DepChain *
D =
new DepChain();
652 findDepChainFromPHI(PN, *
D);
654 Dependences.insert(
D);
658 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
663 return new HexagonVectorLoopCarriedReuseLegacyPass();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_UNUSED
std::optional< std::vector< StOtherPiece > > Other
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
hexagon Hexagon specific predictive commoning for HVX vectors
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
This class represents an Operation in the Expression.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
A vector that has set insertion semantics.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool isVectorTy() const
True if this is an instance of VectorType.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
Hexagon Vector Loop Carried Reuse Pass.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...