LLVM  9.0.0svn
HexagonTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// This file implements a TargetTransformInfo analysis pass specific to the
9 /// Hexagon target machine. It uses the target's detailed information to provide
10 /// more precise answers to certain TTI queries, while letting the target
11 /// independent and default TTI implementations handle the rest.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "HexagonSubtarget.h"
19 #include "llvm/IR/InstrTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/User.h"
22 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "hexagontti"
29 
30 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
31  cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
32 
33 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
34  cl::init(true), cl::Hidden,
35  cl::desc("Control lookup table emission on Hexagon target"));
36 
37 // Constant "cost factor" to make floating point operations more expensive
38 // in terms of vectorization cost. This isn't the best way, but it should
39 // do. Ultimately, the cost should use cycles.
40 static const unsigned FloatFactor = 4;
41 
42 bool HexagonTTIImpl::useHVX() const {
43  return ST.useHVXOps() && HexagonAutoHVX;
44 }
45 
46 bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
47  assert(VecTy->isVectorTy());
48  // Avoid types like <2 x i32*>.
49  if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
50  return false;
51  EVT VecVT = EVT::getEVT(VecTy);
52  if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
53  return false;
54  if (ST.isHVXVectorType(VecVT.getSimpleVT()))
55  return true;
56  auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
57  return Action == TargetLoweringBase::TypeWidenVector;
58 }
59 
60 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
61  if (Ty->isVectorTy())
62  return Ty->getVectorNumElements();
63  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
64  "Expecting scalar type");
65  return 1;
66 }
67 
69 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
70  // Return fast hardware support as every input < 64 bits will be promoted
71  // to 64 bits.
73 }
74 
75 // The Hexagon target can unroll loops with run-time trip counts.
78  UP.Runtime = UP.Partial = true;
79  // Only try to peel innermost loops with small runtime trip counts.
80  if (L && L->empty() && canPeel(L) &&
81  SE.getSmallConstantTripCount(L) == 0 &&
82  SE.getSmallConstantMaxTripCount(L) > 0 &&
83  SE.getSmallConstantMaxTripCount(L) <= 5) {
84  UP.PeelCount = 2;
85  }
86 }
87 
89  return true;
90 }
91 
92 /// --- Vector TTI begin ---
93 
94 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
95  if (Vector)
96  return useHVX() ? 32 : 0;
97  return 32;
98 }
99 
101  return useHVX() ? 2 : 0;
102 }
103 
104 unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
105  return Vector ? getMinVectorRegisterBitWidth() : 32;
106 }
107 
109  return useHVX() ? ST.getVectorLength()*8 : 0;
110 }
111 
112 unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
113  return (8 * ST.getVectorLength()) / ElemWidth;
114 }
115 
117  bool Extract) {
118  return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
119 }
120 
122  ArrayRef<const Value*> Args, unsigned VF) {
124 }
125 
127  ArrayRef<Type*> Tys) {
128  return BaseT::getCallInstrCost(F, RetTy, Tys);
129 }
130 
132  ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
133  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
134 }
135 
138  unsigned ScalarizationCostPassed) {
139  if (ID == Intrinsic::bswap) {
140  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
141  return LT.first + 2;
142  }
143  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
144  ScalarizationCostPassed);
145 }
146 
148  ScalarEvolution *SE, const SCEV *S) {
149  return 0;
150 }
151 
152 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
153  unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
154  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
155  if (Opcode == Instruction::Store)
156  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
157 
158  if (Src->isVectorTy()) {
159  VectorType *VecTy = cast<VectorType>(Src);
160  unsigned VecWidth = VecTy->getBitWidth();
161  if (useHVX() && isTypeForHVX(VecTy)) {
162  unsigned RegWidth = getRegisterBitWidth(true);
163  Alignment = std::min(Alignment, RegWidth/8);
164  // Cost of HVX loads.
165  if (VecWidth % RegWidth == 0)
166  return VecWidth / RegWidth;
167  // Cost of constructing HVX vector from scalar loads.
168  unsigned AlignWidth = 8 * std::max(1u, Alignment);
169  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
170  return 3*NumLoads;
171  }
172 
173  // Non-HVX vectors.
174  // Add extra cost for floating point types.
175  unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
176  : 1;
177  Alignment = std::min(Alignment, 8u);
178  unsigned AlignWidth = 8 * std::max(1u, Alignment);
179  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
180  if (Alignment == 4 || Alignment == 8)
181  return Cost * NumLoads;
182  // Loads of less than 32 bits will need extra inserts to compose a vector.
183  unsigned LogA = Log2_32(Alignment);
184  return (3 - LogA) * Cost * NumLoads;
185  }
186 
187  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
188 }
189 
190 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
191  Type *Src, unsigned Alignment, unsigned AddressSpace) {
192  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
193 }
194 
196  int Index, Type *SubTp) {
197  return 1;
198 }
199 
200 unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
201  Value *Ptr, bool VariableMask, unsigned Alignment) {
202  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
203  Alignment);
204 }
205 
207  Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
208  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
209  bool UseMaskForGaps) {
210  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
211  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
212  Alignment, AddressSpace,
213  UseMaskForCond, UseMaskForGaps);
214  return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
215 }
216 
217 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
218  Type *CondTy, const Instruction *I) {
219  if (ValTy->isVectorTy()) {
220  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
221  if (Opcode == Instruction::FCmp)
222  return LT.first + FloatFactor * getTypeNumElements(ValTy);
223  }
224  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
225 }
226 
227 unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
228  TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
229  TTI::OperandValueProperties Opd1PropInfo,
231  if (Ty->isVectorTy()) {
232  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
233  if (LT.second.isFloatingPoint())
234  return LT.first + FloatFactor * getTypeNumElements(Ty);
235  }
236  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
237  Opd1PropInfo, Opd2PropInfo, Args);
238 }
239 
240 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
241  Type *SrcTy, const Instruction *I) {
242  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
243  unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
244  unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
245 
246  std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
247  std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
248  return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
249  }
250  return 1;
251 }
252 
253 unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
254  unsigned Index) {
255  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
256  : Val;
257  if (Opcode == Instruction::InsertElement) {
258  // Need two rotations for non-zero index.
259  unsigned Cost = (Index != 0) ? 2 : 0;
260  if (ElemTy->isIntegerTy(32))
261  return Cost;
262  // If it's not a 32-bit value, there will need to be an extract.
263  return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
264  }
265 
266  if (Opcode == Instruction::ExtractElement)
267  return 2;
268 
269  return 1;
270 }
271 
272 /// --- Vector TTI end ---
273 
275  return ST.getL1PrefetchDistance();
276 }
277 
279  return ST.getL1CacheLineSize();
280 }
281 
283  ArrayRef<const Value *> Operands) {
284  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
285  if (!CI->isIntegerCast())
286  return false;
287  // Only extensions from an integer type shorter than 32-bit to i32
288  // can be folded into the load.
289  const DataLayout &DL = getDataLayout();
290  unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
291  unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
292  if (DBW != 32 || SBW >= DBW)
293  return false;
294 
295  const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
296  // Technically, this code could allow multiple uses of the load, and
297  // check if all the uses are the same extension operation, but this
298  // should be sufficient for most cases.
299  return LI && LI->hasOneUse();
300  };
301 
302  if (const CastInst *CI = dyn_cast<const CastInst>(U))
303  if (isCastFoldedIntoLoad(CI))
305  return BaseT::getUserCost(U, Operands);
306 }
307 
309  return EmitLookupTables;
310 }
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getUserCost(const User *U, ArrayRef< const Value *> Operands)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getMinimumVF(unsigned ElemWidth) const
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:506
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
An instruction for reading from memory.
Definition: Instructions.h:167
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getL1CacheLineSize() const
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getBitWidth() const
Return the number of bits in the Vector type.
Definition: DerivedTypes.h:483
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:525
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:771
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
unsigned getL1PrefetchDistance() const
PopcntSupportKind
Flags indicating the kind of support for population count.
unsigned getMinVectorRegisterBitWidth() const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:849
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
Expected to fold away in lowering.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
unsigned getRegisterBitWidth(bool Vector) const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Extended Value Type.
Definition: ValueTypes.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool shouldFavorPostInc() const
Bias LSR towards creating post-increment opportunities.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))
bool canPeel(Loop *L)
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class to represent vector types.
Definition: DerivedTypes.h:424
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:818
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:567
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getPrefetchDistance() const
— Vector TTI end —
unsigned getVectorLength() const
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
This class represents an analyzed expression in the program.
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getMaxInterleaveFactor(unsigned VF)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:308
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool empty() const
Definition: LoopInfo.h:145
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Type * getElementType() const
Definition: DerivedTypes.h:391
const DataLayout & getDataLayout() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
This pass exposes codegen information to IR-level passes.
static const unsigned FloatFactor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.