File: | lib/Target/Hexagon/HexagonTargetTransformInfo.cpp |
Warning: | line 165, column 20 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | /// \file | |||
8 | /// This file implements a TargetTransformInfo analysis pass specific to the | |||
9 | /// Hexagon target machine. It uses the target's detailed information to provide | |||
10 | /// more precise answers to certain TTI queries, while letting the target | |||
11 | /// independent and default TTI implementations handle the rest. | |||
12 | /// | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "HexagonTargetTransformInfo.h" | |||
16 | #include "HexagonSubtarget.h" | |||
17 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
18 | #include "llvm/CodeGen/ValueTypes.h" | |||
19 | #include "llvm/IR/InstrTypes.h" | |||
20 | #include "llvm/IR/Instructions.h" | |||
21 | #include "llvm/IR/User.h" | |||
22 | #include "llvm/Support/Casting.h" | |||
23 | #include "llvm/Support/CommandLine.h" | |||
24 | #include "llvm/Transforms/Utils/UnrollLoop.h" | |||
25 | ||||
26 | using namespace llvm; | |||
27 | ||||
28 | #define DEBUG_TYPE"hexagontti" "hexagontti" | |||
29 | ||||
30 | static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), | |||
31 | cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); | |||
32 | ||||
33 | static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", | |||
34 | cl::init(true), cl::Hidden, | |||
35 | cl::desc("Control lookup table emission on Hexagon target")); | |||
36 | ||||
37 | // Constant "cost factor" to make floating point operations more expensive | |||
38 | // in terms of vectorization cost. This isn't the best way, but it should | |||
39 | // do. Ultimately, the cost should use cycles. | |||
40 | static const unsigned FloatFactor = 4; | |||
41 | ||||
42 | bool HexagonTTIImpl::useHVX() const { | |||
43 | return ST.useHVXOps() && HexagonAutoHVX; | |||
44 | } | |||
45 | ||||
46 | bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const { | |||
47 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-9~svn361301/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp" , 47, __PRETTY_FUNCTION__)); | |||
48 | // Avoid types like <2 x i32*>. | |||
49 | if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy()) | |||
50 | return false; | |||
51 | EVT VecVT = EVT::getEVT(VecTy); | |||
52 | if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64) | |||
53 | return false; | |||
54 | if (ST.isHVXVectorType(VecVT.getSimpleVT())) | |||
55 | return true; | |||
56 | auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT()); | |||
57 | return Action == TargetLoweringBase::TypeWidenVector; | |||
58 | } | |||
59 | ||||
60 | unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const { | |||
61 | if (Ty->isVectorTy()) | |||
62 | return Ty->getVectorNumElements(); | |||
63 | assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&(((Ty->isIntegerTy() || Ty->isFloatingPointTy()) && "Expecting scalar type") ? static_cast<void> (0) : __assert_fail ("(Ty->isIntegerTy() || Ty->isFloatingPointTy()) && \"Expecting scalar type\"" , "/build/llvm-toolchain-snapshot-9~svn361301/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp" , 64, __PRETTY_FUNCTION__)) | |||
64 | "Expecting scalar type")(((Ty->isIntegerTy() || Ty->isFloatingPointTy()) && "Expecting scalar type") ? static_cast<void> (0) : __assert_fail ("(Ty->isIntegerTy() || Ty->isFloatingPointTy()) && \"Expecting scalar type\"" , "/build/llvm-toolchain-snapshot-9~svn361301/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp" , 64, __PRETTY_FUNCTION__)); | |||
65 | return 1; | |||
66 | } | |||
67 | ||||
68 | TargetTransformInfo::PopcntSupportKind | |||
69 | HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { | |||
70 | // Return fast hardware support as every input < 64 bits will be promoted | |||
71 | // to 64 bits. | |||
72 | return TargetTransformInfo::PSK_FastHardware; | |||
73 | } | |||
74 | ||||
75 | // The Hexagon target can unroll loops with run-time trip counts. | |||
76 | void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | |||
77 | TTI::UnrollingPreferences &UP) { | |||
78 | UP.Runtime = UP.Partial = true; | |||
79 | // Only try to peel innermost loops with small runtime trip counts. | |||
80 | if (L && L->empty() && canPeel(L) && | |||
81 | SE.getSmallConstantTripCount(L) == 0 && | |||
82 | SE.getSmallConstantMaxTripCount(L) > 0 && | |||
83 | SE.getSmallConstantMaxTripCount(L) <= 5) { | |||
84 | UP.PeelCount = 2; | |||
85 | } | |||
86 | } | |||
87 | ||||
88 | bool HexagonTTIImpl::shouldFavorPostInc() const { | |||
89 | return true; | |||
90 | } | |||
91 | ||||
92 | /// --- Vector TTI begin --- | |||
93 | ||||
94 | unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const { | |||
95 | if (Vector) | |||
96 | return useHVX() ? 32 : 0; | |||
97 | return 32; | |||
98 | } | |||
99 | ||||
100 | unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) { | |||
101 | return useHVX() ? 2 : 0; | |||
102 | } | |||
103 | ||||
104 | unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const { | |||
105 | return Vector ? getMinVectorRegisterBitWidth() : 32; | |||
106 | } | |||
107 | ||||
108 | unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const { | |||
109 | return useHVX() ? ST.getVectorLength()*8 : 0; | |||
110 | } | |||
111 | ||||
112 | unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const { | |||
113 | return (8 * ST.getVectorLength()) / ElemWidth; | |||
114 | } | |||
115 | ||||
116 | unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, | |||
117 | bool Extract) { | |||
118 | return BaseT::getScalarizationOverhead(Ty, Insert, Extract); | |||
119 | } | |||
120 | ||||
121 | unsigned HexagonTTIImpl::getOperandsScalarizationOverhead( | |||
122 | ArrayRef<const Value*> Args, unsigned VF) { | |||
123 | return BaseT::getOperandsScalarizationOverhead(Args, VF); | |||
124 | } | |||
125 | ||||
126 | unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, | |||
127 | ArrayRef<Type*> Tys) { | |||
128 | return BaseT::getCallInstrCost(F, RetTy, Tys); | |||
129 | } | |||
130 | ||||
131 | unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
132 | ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) { | |||
133 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); | |||
134 | } | |||
135 | ||||
136 | unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
137 | ArrayRef<Type*> Tys, FastMathFlags FMF, | |||
138 | unsigned ScalarizationCostPassed) { | |||
139 | if (ID == Intrinsic::bswap) { | |||
140 | std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy); | |||
141 | return LT.first + 2; | |||
142 | } | |||
143 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, | |||
144 | ScalarizationCostPassed); | |||
145 | } | |||
146 | ||||
147 | unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, | |||
148 | ScalarEvolution *SE, const SCEV *S) { | |||
149 | return 0; | |||
150 | } | |||
151 | ||||
152 | unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, | |||
153 | unsigned Alignment, unsigned AddressSpace, const Instruction *I) { | |||
154 | assert(Opcode == Instruction::Load || Opcode == Instruction::Store)((Opcode == Instruction::Load || Opcode == Instruction::Store ) ? static_cast<void> (0) : __assert_fail ("Opcode == Instruction::Load || Opcode == Instruction::Store" , "/build/llvm-toolchain-snapshot-9~svn361301/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp" , 154, __PRETTY_FUNCTION__)); | |||
155 | if (Opcode == Instruction::Store) | |||
156 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); | |||
157 | ||||
158 | if (Src->isVectorTy()) { | |||
159 | VectorType *VecTy = cast<VectorType>(Src); | |||
160 | unsigned VecWidth = VecTy->getBitWidth(); | |||
161 | if (useHVX() && isTypeForHVX(VecTy)) { | |||
162 | unsigned RegWidth = getRegisterBitWidth(true); | |||
163 | Alignment = std::min(Alignment, RegWidth/8); | |||
164 | // Cost of HVX loads. | |||
165 | if (VecWidth % RegWidth == 0) | |||
| ||||
166 | return VecWidth / RegWidth; | |||
167 | // Cost of constructing HVX vector from scalar loads. | |||
168 | unsigned AlignWidth = 8 * std::max(1u, Alignment); | |||
169 | unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; | |||
170 | return 3*NumLoads; | |||
171 | } | |||
172 | ||||
173 | // Non-HVX vectors. | |||
174 | // Add extra cost for floating point types. | |||
175 | unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor | |||
176 | : 1; | |||
177 | Alignment = std::min(Alignment, 8u); | |||
178 | unsigned AlignWidth = 8 * std::max(1u, Alignment); | |||
179 | unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; | |||
180 | if (Alignment == 4 || Alignment == 8) | |||
181 | return Cost * NumLoads; | |||
182 | // Loads of less than 32 bits will need extra inserts to compose a vector. | |||
183 | unsigned LogA = Log2_32(Alignment); | |||
184 | return (3 - LogA) * Cost * NumLoads; | |||
185 | } | |||
186 | ||||
187 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); | |||
188 | } | |||
189 | ||||
190 | unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, | |||
191 | Type *Src, unsigned Alignment, unsigned AddressSpace) { | |||
192 | return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); | |||
193 | } | |||
194 | ||||
195 | unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, | |||
196 | int Index, Type *SubTp) { | |||
197 | return 1; | |||
198 | } | |||
199 | ||||
200 | unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, | |||
201 | Value *Ptr, bool VariableMask, unsigned Alignment) { | |||
202 | return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, | |||
203 | Alignment); | |||
204 | } | |||
205 | ||||
206 | unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, | |||
207 | Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | |||
208 | unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, | |||
209 | bool UseMaskForGaps) { | |||
210 | if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) | |||
| ||||
211 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
212 | Alignment, AddressSpace, | |||
213 | UseMaskForCond, UseMaskForGaps); | |||
214 | return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr); | |||
215 | } | |||
216 | ||||
217 | unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, | |||
218 | Type *CondTy, const Instruction *I) { | |||
219 | if (ValTy->isVectorTy()) { | |||
220 | std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy); | |||
221 | if (Opcode == Instruction::FCmp) | |||
222 | return LT.first + FloatFactor * getTypeNumElements(ValTy); | |||
223 | } | |||
224 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); | |||
225 | } | |||
226 | ||||
227 | unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, | |||
228 | TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, | |||
229 | TTI::OperandValueProperties Opd1PropInfo, | |||
230 | TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) { | |||
231 | if (Ty->isVectorTy()) { | |||
232 | std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty); | |||
233 | if (LT.second.isFloatingPoint()) | |||
234 | return LT.first + FloatFactor * getTypeNumElements(Ty); | |||
235 | } | |||
236 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, | |||
237 | Opd1PropInfo, Opd2PropInfo, Args); | |||
238 | } | |||
239 | ||||
240 | unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, | |||
241 | Type *SrcTy, const Instruction *I) { | |||
242 | if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { | |||
243 | unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; | |||
244 | unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; | |||
245 | ||||
246 | std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy); | |||
247 | std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy); | |||
248 | return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); | |||
249 | } | |||
250 | return 1; | |||
251 | } | |||
252 | ||||
253 | unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, | |||
254 | unsigned Index) { | |||
255 | Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType() | |||
256 | : Val; | |||
257 | if (Opcode == Instruction::InsertElement) { | |||
258 | // Need two rotations for non-zero index. | |||
259 | unsigned Cost = (Index != 0) ? 2 : 0; | |||
260 | if (ElemTy->isIntegerTy(32)) | |||
261 | return Cost; | |||
262 | // If it's not a 32-bit value, there will need to be an extract. | |||
263 | return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index); | |||
264 | } | |||
265 | ||||
266 | if (Opcode == Instruction::ExtractElement) | |||
267 | return 2; | |||
268 | ||||
269 | return 1; | |||
270 | } | |||
271 | ||||
272 | /// --- Vector TTI end --- | |||
273 | ||||
274 | unsigned HexagonTTIImpl::getPrefetchDistance() const { | |||
275 | return ST.getL1PrefetchDistance(); | |||
276 | } | |||
277 | ||||
278 | unsigned HexagonTTIImpl::getCacheLineSize() const { | |||
279 | return ST.getL1CacheLineSize(); | |||
280 | } | |||
281 | ||||
282 | int HexagonTTIImpl::getUserCost(const User *U, | |||
283 | ArrayRef<const Value *> Operands) { | |||
284 | auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { | |||
285 | if (!CI->isIntegerCast()) | |||
286 | return false; | |||
287 | // Only extensions from an integer type shorter than 32-bit to i32 | |||
288 | // can be folded into the load. | |||
289 | const DataLayout &DL = getDataLayout(); | |||
290 | unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy()); | |||
291 | unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy()); | |||
292 | if (DBW != 32 || SBW >= DBW) | |||
293 | return false; | |||
294 | ||||
295 | const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); | |||
296 | // Technically, this code could allow multiple uses of the load, and | |||
297 | // check if all the uses are the same extension operation, but this | |||
298 | // should be sufficient for most cases. | |||
299 | return LI && LI->hasOneUse(); | |||
300 | }; | |||
301 | ||||
302 | if (const CastInst *CI = dyn_cast<const CastInst>(U)) | |||
303 | if (isCastFoldedIntoLoad(CI)) | |||
304 | return TargetTransformInfo::TCC_Free; | |||
305 | return BaseT::getUserCost(U, Operands); | |||
306 | } | |||
307 | ||||
308 | bool HexagonTTIImpl::shouldBuildLookupTables() const { | |||
309 | return EmitLookupTables; | |||
310 | } |