LLVM  10.0.0svn
HexagonTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// This file implements a TargetTransformInfo analysis pass specific to the
9 /// Hexagon target machine. It uses the target's detailed information to provide
10 /// more precise answers to certain TTI queries, while letting the target
11 /// independent and default TTI implementations handle the rest.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "HexagonSubtarget.h"
19 #include "llvm/IR/InstrTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/User.h"
22 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "hexagontti"
29 
30 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
31  cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
32 
33 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
34  cl::init(true), cl::Hidden,
35  cl::desc("Control lookup table emission on Hexagon target"));
36 
37 // Constant "cost factor" to make floating point operations more expensive
38 // in terms of vectorization cost. This isn't the best way, but it should
39 // do. Ultimately, the cost should use cycles.
40 static const unsigned FloatFactor = 4;
41 
42 bool HexagonTTIImpl::useHVX() const {
43  return ST.useHVXOps() && HexagonAutoHVX;
44 }
45 
46 bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
47  assert(VecTy->isVectorTy());
48  if (cast<VectorType>(VecTy)->isScalable())
49  return false;
50  // Avoid types like <2 x i32*>.
51  if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
52  return false;
53  EVT VecVT = EVT::getEVT(VecTy);
54  if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
55  return false;
56  if (ST.isHVXVectorType(VecVT.getSimpleVT()))
57  return true;
58  auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
59  return Action == TargetLoweringBase::TypeWidenVector;
60 }
61 
62 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
63  if (Ty->isVectorTy())
64  return Ty->getVectorNumElements();
65  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
66  "Expecting scalar type");
67  return 1;
68 }
69 
71 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
72  // Return fast hardware support as every input < 64 bits will be promoted
73  // to 64 bits.
75 }
76 
77 // The Hexagon target can unroll loops with run-time trip counts.
80  UP.Runtime = UP.Partial = true;
81  // Only try to peel innermost loops with small runtime trip counts.
82  if (L && L->empty() && canPeel(L) &&
83  SE.getSmallConstantTripCount(L) == 0 &&
84  SE.getSmallConstantMaxTripCount(L) > 0 &&
85  SE.getSmallConstantMaxTripCount(L) <= 5) {
86  UP.PeelCount = 2;
87  }
88 }
89 
91  return true;
92 }
93 
94 /// --- Vector TTI begin ---
95 
97  if (Vector)
98  return useHVX() ? 32 : 0;
99  return 32;
100 }
101 
103  return useHVX() ? 2 : 0;
104 }
105 
107  return Vector ? getMinVectorRegisterBitWidth() : 32;
108 }
109 
111  return useHVX() ? ST.getVectorLength()*8 : 0;
112 }
113 
114 unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
115  return (8 * ST.getVectorLength()) / ElemWidth;
116 }
117 
119  bool Extract) {
120  return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
121 }
122 
124  ArrayRef<const Value*> Args, unsigned VF) {
126 }
127 
129  ArrayRef<Type*> Tys) {
130  return BaseT::getCallInstrCost(F, RetTy, Tys);
131 }
132 
134  ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
135  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
136 }
137 
140  unsigned ScalarizationCostPassed) {
141  if (ID == Intrinsic::bswap) {
142  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
143  return LT.first + 2;
144  }
145  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
146  ScalarizationCostPassed);
147 }
148 
150  ScalarEvolution *SE, const SCEV *S) {
151  return 0;
152 }
153 
154 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
155  unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
156  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
157  if (Opcode == Instruction::Store)
158  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
159 
160  if (Src->isVectorTy()) {
161  VectorType *VecTy = cast<VectorType>(Src);
162  unsigned VecWidth = VecTy->getBitWidth();
163  if (useHVX() && isTypeForHVX(VecTy)) {
164  unsigned RegWidth = getRegisterBitWidth(true);
165  assert(RegWidth && "Non-zero vector register width expected");
166  // Cost of HVX loads.
167  if (VecWidth % RegWidth == 0)
168  return VecWidth / RegWidth;
169  // Cost of constructing HVX vector from scalar loads.
170  Alignment = std::min(Alignment, RegWidth / 8);
171  unsigned AlignWidth = 8 * std::max(1u, Alignment);
172  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
173  return 3 * NumLoads;
174  }
175 
176  // Non-HVX vectors.
177  // Add extra cost for floating point types.
178  unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
179  : 1;
180  Alignment = std::min(Alignment, 8u);
181  unsigned AlignWidth = 8 * std::max(1u, Alignment);
182  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
183  if (Alignment == 4 || Alignment == 8)
184  return Cost * NumLoads;
185  // Loads of less than 32 bits will need extra inserts to compose a vector.
186  unsigned LogA = Log2_32(Alignment);
187  return (3 - LogA) * Cost * NumLoads;
188  }
189 
190  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
191 }
192 
193 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
194  Type *Src, unsigned Alignment, unsigned AddressSpace) {
195  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
196 }
197 
199  int Index, Type *SubTp) {
200  return 1;
201 }
202 
203 unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
204  Value *Ptr, bool VariableMask, unsigned Alignment) {
205  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
206  Alignment);
207 }
208 
210  Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
211  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
212  bool UseMaskForGaps) {
213  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
214  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
215  Alignment, AddressSpace,
216  UseMaskForCond, UseMaskForGaps);
217  return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
218 }
219 
220 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
221  Type *CondTy, const Instruction *I) {
222  if (ValTy->isVectorTy()) {
223  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
224  if (Opcode == Instruction::FCmp)
225  return LT.first + FloatFactor * getTypeNumElements(ValTy);
226  }
227  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
228 }
229 
230 unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
231  TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
232  TTI::OperandValueProperties Opd1PropInfo,
234  if (Ty->isVectorTy()) {
235  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
236  if (LT.second.isFloatingPoint())
237  return LT.first + FloatFactor * getTypeNumElements(Ty);
238  }
239  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
240  Opd1PropInfo, Opd2PropInfo, Args);
241 }
242 
243 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
244  Type *SrcTy, const Instruction *I) {
245  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
246  unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
247  unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
248 
249  std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
250  std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
251  return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
252  }
253  return 1;
254 }
255 
256 unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
257  unsigned Index) {
258  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
259  : Val;
260  if (Opcode == Instruction::InsertElement) {
261  // Need two rotations for non-zero index.
262  unsigned Cost = (Index != 0) ? 2 : 0;
263  if (ElemTy->isIntegerTy(32))
264  return Cost;
265  // If it's not a 32-bit value, there will need to be an extract.
266  return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
267  }
268 
269  if (Opcode == Instruction::ExtractElement)
270  return 2;
271 
272  return 1;
273 }
274 
275 /// --- Vector TTI end ---
276 
278  return ST.getL1PrefetchDistance();
279 }
280 
282  return ST.getL1CacheLineSize();
283 }
284 
287  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
288  if (!CI->isIntegerCast())
289  return false;
290  // Only extensions from an integer type shorter than 32-bit to i32
291  // can be folded into the load.
292  const DataLayout &DL = getDataLayout();
293  unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
294  unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
295  if (DBW != 32 || SBW >= DBW)
296  return false;
297 
298  const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
299  // Technically, this code could allow multiple uses of the load, and
300  // check if all the uses are the same extension operation, but this
301  // should be sufficient for most cases.
302  return LI && LI->hasOneUse();
303  };
304 
305  if (const CastInst *CI = dyn_cast<const CastInst>(U))
306  if (isCastFoldedIntoLoad(CI))
308  return BaseT::getUserCost(U, Operands);
309 }
310 
312  return EmitLookupTables;
313 }
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:589
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getUserCost(const User *U, ArrayRef< const Value *> Operands)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getMinimumVF(unsigned ElemWidth) const
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:528
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getL1CacheLineSize() const
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getBitWidth() const
Return the minimum number of bits in the Vector type.
Definition: DerivedTypes.h:551
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:547
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
mir Rename Register Operands
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:793
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
unsigned getL1PrefetchDistance() const
PopcntSupportKind
Flags indicating the kind of support for population count.
unsigned getMinVectorRegisterBitWidth() const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:871
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
Expected to fold away in lowering.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
unsigned getRegisterBitWidth(bool Vector) const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Extended Value Type.
Definition: ValueTypes.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool shouldFavorPostInc() const
Bias LSR towards creating post-increment opportunities.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))
bool canPeel(Loop *L)
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:561
Class to represent vector types.
Definition: DerivedTypes.h:427
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:840
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:598
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getPrefetchDistance() const
— Vector TTI end —
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:133
unsigned getVectorLength() const
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
This class represents an analyzed expression in the program.
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getMaxInterleaveFactor(unsigned VF)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:477
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool empty() const
Definition: LoopInfo.h:151
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Type * getElementType() const
Definition: DerivedTypes.h:394
const DataLayout & getDataLayout() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
This pass exposes codegen information to IR-level passes.
static const unsigned FloatFactor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.