30#include "llvm/IR/IntrinsicsHexagon.h"
49#define DEBUG_TYPE "hexagon-vlcr"
52 "Number of values that were reused from a previous iteration.");
56 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
65 ChainOfDependences Chain;
68 bool isIdentical(DepChain &
Other)
const {
71 ChainOfDependences &OtherChain =
Other.getChain();
72 for (
int i = 0; i <
size(); ++i) {
73 if (Chain[i] != OtherChain[i])
79 ChainOfDependences &getChain() {
91 void push_back(Instruction *
I) {
95 int iterations()
const {
100 return Chain.front();
111 friend raw_ostream &
operator<< (raw_ostream &OS,
const DepChain &
D);
116 const ChainOfDependences &CD =
D.Chain;
117 int ChainSize = CD.size();
118 OS <<
"**DepChain Start::**\n";
119 for (
int i = 0; i < ChainSize -1; ++i) {
120 OS << *(CD[i]) <<
" -->\n";
122 OS << *CD[ChainSize-1] <<
"\n";
133 std::map<Instruction *, DepChain *> DepChains;
136 ReuseValue() =
default;
139 Inst2Replace =
nullptr;
140 BackedgeInst =
nullptr;
144 bool isDefined() {
return Inst2Replace !=
nullptr; }
149 OS <<
"** ReuseValue ***\n";
150 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
151 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
155 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
159 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {}
161 StringRef getPassName()
const override {
162 return "Hexagon-specific loop carried reuse for HVX vectors";
165 void getAnalysisUsage(AnalysisUsage &AU)
const override {
172 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
175 class HexagonVectorLoopCarriedReuse {
177 HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(
L){};
182 SetVector<DepChain *> Dependences;
183 std::set<Instruction *> ReplacedInsts;
185 ReuseValue ReuseCandidate;
188 void findLoopCarriedDeps();
189 void findValueToReuse();
190 void findDepChainFromPHI(Instruction *
I, DepChain &
D);
193 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2,
int Iters);
194 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
195 bool canReplace(Instruction *
I);
196 bool isCallInstCommutative(CallInst *
C);
201char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
204 "Hexagon-specific predictive commoning for HVX vectors",
209 "Hexagon-specific predictive commoning for HVX vectors",
216 HexagonVectorLoopCarriedReuse Vlcr(&L);
224bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
228 HexagonVectorLoopCarriedReuse Vlcr(L);
232bool HexagonVectorLoopCarriedReuse::run() {
247bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *
C) {
248 switch (
C->getCalledFunction()->getIntrinsicID()) {
249 case Intrinsic::hexagon_V6_vaddb:
250 case Intrinsic::hexagon_V6_vaddb_128B:
251 case Intrinsic::hexagon_V6_vaddh:
252 case Intrinsic::hexagon_V6_vaddh_128B:
253 case Intrinsic::hexagon_V6_vaddw:
254 case Intrinsic::hexagon_V6_vaddw_128B:
255 case Intrinsic::hexagon_V6_vaddubh:
256 case Intrinsic::hexagon_V6_vaddubh_128B:
257 case Intrinsic::hexagon_V6_vadduhw:
258 case Intrinsic::hexagon_V6_vadduhw_128B:
259 case Intrinsic::hexagon_V6_vaddhw:
260 case Intrinsic::hexagon_V6_vaddhw_128B:
261 case Intrinsic::hexagon_V6_vmaxb:
262 case Intrinsic::hexagon_V6_vmaxb_128B:
263 case Intrinsic::hexagon_V6_vmaxh:
264 case Intrinsic::hexagon_V6_vmaxh_128B:
265 case Intrinsic::hexagon_V6_vmaxw:
266 case Intrinsic::hexagon_V6_vmaxw_128B:
267 case Intrinsic::hexagon_V6_vmaxub:
268 case Intrinsic::hexagon_V6_vmaxub_128B:
269 case Intrinsic::hexagon_V6_vmaxuh:
270 case Intrinsic::hexagon_V6_vmaxuh_128B:
271 case Intrinsic::hexagon_V6_vminub:
272 case Intrinsic::hexagon_V6_vminub_128B:
273 case Intrinsic::hexagon_V6_vminuh:
274 case Intrinsic::hexagon_V6_vminuh_128B:
275 case Intrinsic::hexagon_V6_vminb:
276 case Intrinsic::hexagon_V6_vminb_128B:
277 case Intrinsic::hexagon_V6_vminh:
278 case Intrinsic::hexagon_V6_vminh_128B:
279 case Intrinsic::hexagon_V6_vminw:
280 case Intrinsic::hexagon_V6_vminw_128B:
281 case Intrinsic::hexagon_V6_vmpyub:
282 case Intrinsic::hexagon_V6_vmpyub_128B:
283 case Intrinsic::hexagon_V6_vmpyuh:
284 case Intrinsic::hexagon_V6_vmpyuh_128B:
285 case Intrinsic::hexagon_V6_vavgub:
286 case Intrinsic::hexagon_V6_vavgub_128B:
287 case Intrinsic::hexagon_V6_vavgh:
288 case Intrinsic::hexagon_V6_vavgh_128B:
289 case Intrinsic::hexagon_V6_vavguh:
290 case Intrinsic::hexagon_V6_vavguh_128B:
291 case Intrinsic::hexagon_V6_vavgw:
292 case Intrinsic::hexagon_V6_vavgw_128B:
293 case Intrinsic::hexagon_V6_vavgb:
294 case Intrinsic::hexagon_V6_vavgb_128B:
295 case Intrinsic::hexagon_V6_vavguw:
296 case Intrinsic::hexagon_V6_vavguw_128B:
297 case Intrinsic::hexagon_V6_vabsdiffh:
298 case Intrinsic::hexagon_V6_vabsdiffh_128B:
299 case Intrinsic::hexagon_V6_vabsdiffub:
300 case Intrinsic::hexagon_V6_vabsdiffub_128B:
301 case Intrinsic::hexagon_V6_vabsdiffuh:
302 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
303 case Intrinsic::hexagon_V6_vabsdiffw:
304 case Intrinsic::hexagon_V6_vabsdiffw_128B:
311bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
313 if (!
I1->isSameOperationAs(I2))
321 if (C1->getCalledFunction() != C2->getCalledFunction())
329 unsigned NumOperands =
I1->getNumOperands();
330 for (
unsigned i = 0; i < NumOperands; ++i) {
343bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *
I) {
348 switch (
II->getIntrinsicID()) {
349 case Intrinsic::hexagon_V6_hi:
350 case Intrinsic::hexagon_V6_lo:
351 case Intrinsic::hexagon_V6_hi_128B:
352 case Intrinsic::hexagon_V6_lo_128B:
359void HexagonVectorLoopCarriedReuse::findValueToReuse() {
360 for (
auto *
D : Dependences) {
361 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
365 <<
".. Skipping because number of iterations > than the limit\n");
371 int Iters =
D->iterations();
374 <<
" can be reused\n");
376 SmallVector<Instruction *, 4> PNUsers;
377 for (Use &U : PN->
uses()) {
380 if (
User->getParent() != BB)
382 if (ReplacedInsts.count(User)) {
384 <<
" has already been replaced. Skipping...\n");
389 if (
User->mayHaveSideEffects())
391 if (!canReplace(User))
404 for (Instruction *
I : PNUsers) {
405 for (Use &U : BEInst->
uses()) {
410 if (!isEquivalentOperation(
I, BEUser))
413 int NumOperands =
I->getNumOperands();
424 std::map<Instruction *, DepChain *> DepChains;
426 if ((
I &&
I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
428 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
432 for (
int T = 0;
T < NumOperands; ++
T) {
435 if (!OpInst && !BEOpInst) {
442 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
445 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
449 DepChains[OpInst] =
D;
460 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
475 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
478 DepChains[OpInst] =
D;
487 ReuseCandidate.Inst2Replace =
I;
488 ReuseCandidate.BackedgeInst = BEUser;
489 ReuseCandidate.DepChains = DepChains;
490 ReuseCandidate.Iterations = Iters;
493 ReuseCandidate.reset();
497 ReuseCandidate.reset();
500Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
508void HexagonVectorLoopCarriedReuse::reuseValue() {
510 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
513 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
514 int Iterations = ReuseCandidate.Iterations;
516 assert(!DepChains.empty() &&
"No DepChains");
517 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
519 SmallVector<Instruction *, 4> InstsInPreheader;
520 for (
int i = 0; i < Iterations; ++i) {
522 for (
int j = 0;
j < NumOperands; ++
j) {
527 DepChain &
D = *DepChains[
I];
531 Value *ValInPreheader = findValueInBlock(
D[i], LoopPH);
532 InstInPreheader->
setOperand(j, ValInPreheader);
534 InstsInPreheader.
push_back(InstInPreheader);
535 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
543 Value *BEVal = BEInst;
545 for (
int i = Iterations-1; i >=0 ; --i) {
546 Instruction *InstInPreheader = InstsInPreheader[i];
547 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
557 ReplacedInsts.insert(Inst2Replace);
558 ++HexagonNumVectorLoopCarriedReuse;
561bool HexagonVectorLoopCarriedReuse::doVLCR() {
563 "Can do VLCR on the innermost loop only");
565 "Can do VLCR only on single block loops");
576 findLoopCarriedDeps();
578 if (ReuseCandidate.isDefined()) {
588void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *
I,
596 if (NumIncomingValues != 2) {
611 assert(BEInst &&
"There should be a value over the backedge");
620 findDepChainFromPHI(BEInst,
D);
624DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
627 for (
auto *
D : Dependences) {
628 if (
D->front() == I1 &&
D->back() == I2 &&
D->iterations() == Iters)
634void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
641 DepChain *
D =
new DepChain();
642 findDepChainFromPHI(PN, *
D);
644 Dependences.insert(
D);
648 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
653 return new HexagonVectorLoopCarriedReuseLegacyPass();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
iterator begin()
Instruction iterator methods.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isVectorTy() const
True if this is an instance of VectorType.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction & back() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI char & LoopSimplifyID
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Run pass over the Loop.
HexagonVectorLoopCarriedReusePass()=default
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...