LLVM  9.0.0svn
HexagonTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// This file implements a TargetTransformInfo analysis pass specific to the
9 /// Hexagon target machine. It uses the target's detailed information to provide
10 /// more precise answers to certain TTI queries, while letting the target
11 /// independent and default TTI implementations handle the rest.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "HexagonSubtarget.h"
19 #include "llvm/IR/InstrTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/User.h"
22 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "hexagontti"
29 
30 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
31  cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
32 
33 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
34  cl::init(true), cl::Hidden,
35  cl::desc("Control lookup table emission on Hexagon target"));
36 
37 // Constant "cost factor" to make floating point operations more expensive
38 // in terms of vectorization cost. This isn't the best way, but it should
39 // do. Ultimately, the cost should use cycles.
40 static const unsigned FloatFactor = 4;
41 
42 bool HexagonTTIImpl::useHVX() const {
43  return ST.useHVXOps() && HexagonAutoHVX;
44 }
45 
46 bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
47  assert(VecTy->isVectorTy());
48  // Avoid types like <2 x i32*>.
49  if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
50  return false;
51  EVT VecVT = EVT::getEVT(VecTy);
52  if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
53  return false;
54  if (ST.isHVXVectorType(VecVT.getSimpleVT()))
55  return true;
56  auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
57  return Action == TargetLoweringBase::TypeWidenVector;
58 }
59 
60 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
61  if (Ty->isVectorTy())
62  return Ty->getVectorNumElements();
63  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
64  "Expecting scalar type");
65  return 1;
66 }
67 
69 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
70  // Return fast hardware support as every input < 64 bits will be promoted
71  // to 64 bits.
73 }
74 
75 // The Hexagon target can unroll loops with run-time trip counts.
78  UP.Runtime = UP.Partial = true;
79  // Only try to peel innermost loops with small runtime trip counts.
80  if (L && L->empty() && canPeel(L) &&
81  SE.getSmallConstantTripCount(L) == 0 &&
82  SE.getSmallConstantMaxTripCount(L) > 0 &&
83  SE.getSmallConstantMaxTripCount(L) <= 5) {
84  UP.PeelCount = 2;
85  }
86 }
87 
89  return true;
90 }
91 
92 /// --- Vector TTI begin ---
93 
94 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
95  if (Vector)
96  return useHVX() ? 32 : 0;
97  return 32;
98 }
99 
101  return useHVX() ? 2 : 0;
102 }
103 
104 unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
105  return Vector ? getMinVectorRegisterBitWidth() : 32;
106 }
107 
109  return useHVX() ? ST.getVectorLength()*8 : 0;
110 }
111 
112 unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
113  return (8 * ST.getVectorLength()) / ElemWidth;
114 }
115 
117  bool Extract) {
118  return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
119 }
120 
122  ArrayRef<const Value*> Args, unsigned VF) {
124 }
125 
127  ArrayRef<Type*> Tys) {
128  return BaseT::getCallInstrCost(F, RetTy, Tys);
129 }
130 
132  ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
133  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
134 }
135 
138  unsigned ScalarizationCostPassed) {
139  if (ID == Intrinsic::bswap) {
140  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
141  return LT.first + 2;
142  }
143  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
144  ScalarizationCostPassed);
145 }
146 
148  ScalarEvolution *SE, const SCEV *S) {
149  return 0;
150 }
151 
152 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
153  unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
154  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
155  if (Opcode == Instruction::Store)
156  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
157 
158  if (Src->isVectorTy()) {
159  VectorType *VecTy = cast<VectorType>(Src);
160  unsigned VecWidth = VecTy->getBitWidth();
161  if (useHVX() && isTypeForHVX(VecTy)) {
162  unsigned RegWidth = getRegisterBitWidth(true);
163  assert(RegWidth && "Non-zero vector register width expected");
164  // Cost of HVX loads.
165  if (VecWidth % RegWidth == 0)
166  return VecWidth / RegWidth;
167  // Cost of constructing HVX vector from scalar loads.
168  Alignment = std::min(Alignment, RegWidth / 8);
169  unsigned AlignWidth = 8 * std::max(1u, Alignment);
170  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
171  return 3 * NumLoads;
172  }
173 
174  // Non-HVX vectors.
175  // Add extra cost for floating point types.
176  unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
177  : 1;
178  Alignment = std::min(Alignment, 8u);
179  unsigned AlignWidth = 8 * std::max(1u, Alignment);
180  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
181  if (Alignment == 4 || Alignment == 8)
182  return Cost * NumLoads;
183  // Loads of less than 32 bits will need extra inserts to compose a vector.
184  unsigned LogA = Log2_32(Alignment);
185  return (3 - LogA) * Cost * NumLoads;
186  }
187 
188  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
189 }
190 
191 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
192  Type *Src, unsigned Alignment, unsigned AddressSpace) {
193  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
194 }
195 
197  int Index, Type *SubTp) {
198  return 1;
199 }
200 
201 unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
202  Value *Ptr, bool VariableMask, unsigned Alignment) {
203  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
204  Alignment);
205 }
206 
208  Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
209  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
210  bool UseMaskForGaps) {
211  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
212  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
213  Alignment, AddressSpace,
214  UseMaskForCond, UseMaskForGaps);
215  return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
216 }
217 
218 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
219  Type *CondTy, const Instruction *I) {
220  if (ValTy->isVectorTy()) {
221  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
222  if (Opcode == Instruction::FCmp)
223  return LT.first + FloatFactor * getTypeNumElements(ValTy);
224  }
225  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
226 }
227 
228 unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
229  TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
230  TTI::OperandValueProperties Opd1PropInfo,
232  if (Ty->isVectorTy()) {
233  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
234  if (LT.second.isFloatingPoint())
235  return LT.first + FloatFactor * getTypeNumElements(Ty);
236  }
237  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
238  Opd1PropInfo, Opd2PropInfo, Args);
239 }
240 
241 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
242  Type *SrcTy, const Instruction *I) {
243  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
244  unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
245  unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
246 
247  std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
248  std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
249  return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
250  }
251  return 1;
252 }
253 
254 unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
255  unsigned Index) {
256  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
257  : Val;
258  if (Opcode == Instruction::InsertElement) {
259  // Need two rotations for non-zero index.
260  unsigned Cost = (Index != 0) ? 2 : 0;
261  if (ElemTy->isIntegerTy(32))
262  return Cost;
263  // If it's not a 32-bit value, there will need to be an extract.
264  return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
265  }
266 
267  if (Opcode == Instruction::ExtractElement)
268  return 2;
269 
270  return 1;
271 }
272 
273 /// --- Vector TTI end ---
274 
276  return ST.getL1PrefetchDistance();
277 }
278 
280  return ST.getL1CacheLineSize();
281 }
282 
284  ArrayRef<const Value *> Operands) {
285  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
286  if (!CI->isIntegerCast())
287  return false;
288  // Only extensions from an integer type shorter than 32-bit to i32
289  // can be folded into the load.
290  const DataLayout &DL = getDataLayout();
291  unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
292  unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
293  if (DBW != 32 || SBW >= DBW)
294  return false;
295 
296  const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
297  // Technically, this code could allow multiple uses of the load, and
298  // check if all the uses are the same extension operation, but this
299  // should be sufficient for most cases.
300  return LI && LI->hasOneUse();
301  };
302 
303  if (const CastInst *CI = dyn_cast<const CastInst>(U))
304  if (isCastFoldedIntoLoad(CI))
306  return BaseT::getUserCost(U, Operands);
307 }
308 
310  return EmitLookupTables;
311 }
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:581
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getUserCost(const User *U, ArrayRef< const Value *> Operands)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getMinimumVF(unsigned ElemWidth) const
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:520
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getL1CacheLineSize() const
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getBitWidth() const
Return the minimum number of bits in the Vector type.
Definition: DerivedTypes.h:525
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:539
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:785
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
unsigned getL1PrefetchDistance() const
PopcntSupportKind
Flags indicating the kind of support for population count.
unsigned getMinVectorRegisterBitWidth() const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:863
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
Expected to fold away in lowering.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
unsigned getRegisterBitWidth(bool Vector) const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Extended Value Type.
Definition: ValueTypes.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool shouldFavorPostInc() const
Bias LSR towards creating post-increment opportunities.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))
bool canPeel(Loop *L)
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:832
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:601
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getPrefetchDistance() const
— Vector TTI end —
unsigned getVectorLength() const
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
This class represents an analyzed expression in the program.
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:510
Parameters that control the generic loop unrolling transformation.
static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getMaxInterleaveFactor(unsigned VF)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:340
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool empty() const
Definition: LoopInfo.h:148
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Type * getElementType() const
Definition: DerivedTypes.h:394
const DataLayout & getDataLayout() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
This pass exposes codegen information to IR-level passes.
static const unsigned FloatFactor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.