File: | include/llvm/Analysis/TargetTransformInfoImpl.h |
Warning: | line 694, column 52 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "ARMTargetTransformInfo.h" | |||
10 | #include "ARMSubtarget.h" | |||
11 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
12 | #include "llvm/ADT/APInt.h" | |||
13 | #include "llvm/ADT/SmallVector.h" | |||
14 | #include "llvm/Analysis/LoopInfo.h" | |||
15 | #include "llvm/CodeGen/CostTable.h" | |||
16 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
17 | #include "llvm/CodeGen/ValueTypes.h" | |||
18 | #include "llvm/IR/BasicBlock.h" | |||
19 | #include "llvm/IR/CallSite.h" | |||
20 | #include "llvm/IR/DataLayout.h" | |||
21 | #include "llvm/IR/DerivedTypes.h" | |||
22 | #include "llvm/IR/Instruction.h" | |||
23 | #include "llvm/IR/Instructions.h" | |||
24 | #include "llvm/IR/IntrinsicInst.h" | |||
25 | #include "llvm/IR/Type.h" | |||
26 | #include "llvm/MC/SubtargetFeature.h" | |||
27 | #include "llvm/Support/Casting.h" | |||
28 | #include "llvm/Support/MachineValueType.h" | |||
29 | #include "llvm/Target/TargetMachine.h" | |||
30 | #include <algorithm> | |||
31 | #include <cassert> | |||
32 | #include <cstdint> | |||
33 | #include <utility> | |||
34 | ||||
35 | using namespace llvm; | |||
36 | ||||
37 | #define DEBUG_TYPE"armtti" "armtti" | |||
38 | ||||
39 | bool ARMTTIImpl::areInlineCompatible(const Function *Caller, | |||
40 | const Function *Callee) const { | |||
41 | const TargetMachine &TM = getTLI()->getTargetMachine(); | |||
42 | const FeatureBitset &CallerBits = | |||
43 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); | |||
44 | const FeatureBitset &CalleeBits = | |||
45 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); | |||
46 | ||||
47 | // To inline a callee, all features not in the whitelist must match exactly. | |||
48 | bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == | |||
49 | (CalleeBits & ~InlineFeatureWhitelist); | |||
50 | // For features in the whitelist, the callee's features must be a subset of | |||
51 | // the callers'. | |||
52 | bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == | |||
53 | (CalleeBits & InlineFeatureWhitelist); | |||
54 | return MatchExact && MatchSubset; | |||
55 | } | |||
56 | ||||
57 | int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { | |||
58 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 58, __PRETTY_FUNCTION__)); | |||
59 | ||||
60 | unsigned Bits = Ty->getPrimitiveSizeInBits(); | |||
61 | if (Bits == 0 || Imm.getActiveBits() >= 64) | |||
62 | return 4; | |||
63 | ||||
64 | int64_t SImmVal = Imm.getSExtValue(); | |||
65 | uint64_t ZImmVal = Imm.getZExtValue(); | |||
66 | if (!ST->isThumb()) { | |||
67 | if ((SImmVal >= 0 && SImmVal < 65536) || | |||
68 | (ARM_AM::getSOImmVal(ZImmVal) != -1) || | |||
69 | (ARM_AM::getSOImmVal(~ZImmVal) != -1)) | |||
70 | return 1; | |||
71 | return ST->hasV6T2Ops() ? 2 : 3; | |||
72 | } | |||
73 | if (ST->isThumb2()) { | |||
74 | if ((SImmVal >= 0 && SImmVal < 65536) || | |||
75 | (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || | |||
76 | (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) | |||
77 | return 1; | |||
78 | return ST->hasV6T2Ops() ? 2 : 3; | |||
79 | } | |||
80 | // Thumb1, any i8 imm cost 1. | |||
81 | if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256)) | |||
82 | return 1; | |||
83 | if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) | |||
84 | return 2; | |||
85 | // Load from constantpool. | |||
86 | return 3; | |||
87 | } | |||
88 | ||||
89 | // Constants smaller than 256 fit in the immediate field of | |||
90 | // Thumb1 instructions so we return a zero cost and 1 otherwise. | |||
91 | int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, | |||
92 | const APInt &Imm, Type *Ty) { | |||
93 | if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) | |||
94 | return 0; | |||
95 | ||||
96 | return 1; | |||
97 | } | |||
98 | ||||
99 | int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | |||
100 | Type *Ty) { | |||
101 | // Division by a constant can be turned into multiplication, but only if we | |||
102 | // know it's constant. So it's not so much that the immediate is cheap (it's | |||
103 | // not), but that the alternative is worse. | |||
104 | // FIXME: this is probably unneeded with GlobalISel. | |||
105 | if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || | |||
106 | Opcode == Instruction::SRem || Opcode == Instruction::URem) && | |||
107 | Idx == 1) | |||
108 | return 0; | |||
109 | ||||
110 | if (Opcode == Instruction::And) { | |||
111 | // UXTB/UXTH | |||
112 | if (Imm == 255 || Imm == 65535) | |||
113 | return 0; | |||
114 | // Conversion to BIC is free, and means we can use ~Imm instead. | |||
115 | return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); | |||
116 | } | |||
117 | ||||
118 | if (Opcode == Instruction::Add) | |||
119 | // Conversion to SUB is free, and means we can use -Imm instead. | |||
120 | return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); | |||
121 | ||||
122 | if (Opcode == Instruction::ICmp && Imm.isNegative() && | |||
123 | Ty->getIntegerBitWidth() == 32) { | |||
124 | int64_t NegImm = -Imm.getSExtValue(); | |||
125 | if (ST->isThumb2() && NegImm < 1<<12) | |||
126 | // icmp X, #-C -> cmn X, #C | |||
127 | return 0; | |||
128 | if (ST->isThumb() && NegImm < 1<<8) | |||
129 | // icmp X, #-C -> adds X, #C | |||
130 | return 0; | |||
131 | } | |||
132 | ||||
133 | // xor a, -1 can always be folded to MVN | |||
134 | if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) | |||
135 | return 0; | |||
136 | ||||
137 | return getIntImmCost(Imm, Ty); | |||
138 | } | |||
139 | ||||
140 | int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | |||
141 | const Instruction *I) { | |||
142 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
143 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 143, __PRETTY_FUNCTION__)); | |||
144 | ||||
145 | // Single to/from double precision conversions. | |||
146 | static const CostTblEntry NEONFltDblTbl[] = { | |||
147 | // Vector fptrunc/fpext conversions. | |||
148 | { ISD::FP_ROUND, MVT::v2f64, 2 }, | |||
149 | { ISD::FP_EXTEND, MVT::v2f32, 2 }, | |||
150 | { ISD::FP_EXTEND, MVT::v4f32, 4 } | |||
151 | }; | |||
152 | ||||
153 | if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || | |||
154 | ISD == ISD::FP_EXTEND)) { | |||
155 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
156 | if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) | |||
157 | return LT.first * Entry->Cost; | |||
158 | } | |||
159 | ||||
160 | EVT SrcTy = TLI->getValueType(DL, Src); | |||
161 | EVT DstTy = TLI->getValueType(DL, Dst); | |||
162 | ||||
163 | if (!SrcTy.isSimple() || !DstTy.isSimple()) | |||
164 | return BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
165 | ||||
166 | // Some arithmetic, load and store operations have specific instructions | |||
167 | // to cast up/down their types automatically at no extra cost. | |||
168 | // TODO: Get these tables to know at least what the related operations are. | |||
169 | static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { | |||
170 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, | |||
171 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, | |||
172 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, | |||
173 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, | |||
174 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, | |||
175 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, | |||
176 | ||||
177 | // The number of vmovl instructions for the extension. | |||
178 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
179 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
180 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | |||
181 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | |||
182 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | |||
183 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | |||
184 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | |||
185 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | |||
186 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | |||
187 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | |||
188 | ||||
189 | // Operations that we legalize using splitting. | |||
190 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, | |||
191 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, | |||
192 | ||||
193 | // Vector float <-> i32 conversions. | |||
194 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
195 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
196 | ||||
197 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | |||
198 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | |||
199 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, | |||
200 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, | |||
201 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | |||
202 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | |||
203 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, | |||
204 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, | |||
205 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | |||
206 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | |||
207 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | |||
208 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | |||
209 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | |||
210 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | |||
211 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, | |||
212 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, | |||
213 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, | |||
214 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, | |||
215 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, | |||
216 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, | |||
217 | ||||
218 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, | |||
219 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, | |||
220 | { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, | |||
221 | { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, | |||
222 | { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, | |||
223 | { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, | |||
224 | ||||
225 | // Vector double <-> i32 conversions. | |||
226 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
227 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
228 | ||||
229 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | |||
230 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | |||
231 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, | |||
232 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, | |||
233 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
234 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
235 | ||||
236 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, | |||
237 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, | |||
238 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, | |||
239 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, | |||
240 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, | |||
241 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } | |||
242 | }; | |||
243 | ||||
244 | if (SrcTy.isVector() && ST->hasNEON()) { | |||
245 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, | |||
246 | DstTy.getSimpleVT(), | |||
247 | SrcTy.getSimpleVT())) | |||
248 | return Entry->Cost; | |||
249 | } | |||
250 | ||||
251 | // Scalar float to integer conversions. | |||
252 | static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { | |||
253 | { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, | |||
254 | { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, | |||
255 | { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, | |||
256 | { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, | |||
257 | { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, | |||
258 | { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, | |||
259 | { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, | |||
260 | { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, | |||
261 | { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, | |||
262 | { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, | |||
263 | { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, | |||
264 | { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, | |||
265 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, | |||
266 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, | |||
267 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, | |||
268 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, | |||
269 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, | |||
270 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, | |||
271 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, | |||
272 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } | |||
273 | }; | |||
274 | if (SrcTy.isFloatingPoint() && ST->hasNEON()) { | |||
275 | if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, | |||
276 | DstTy.getSimpleVT(), | |||
277 | SrcTy.getSimpleVT())) | |||
278 | return Entry->Cost; | |||
279 | } | |||
280 | ||||
281 | // Scalar integer to float conversions. | |||
282 | static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { | |||
283 | { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, | |||
284 | { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, | |||
285 | { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, | |||
286 | { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, | |||
287 | { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, | |||
288 | { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, | |||
289 | { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, | |||
290 | { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, | |||
291 | { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, | |||
292 | { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, | |||
293 | { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, | |||
294 | { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, | |||
295 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, | |||
296 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, | |||
297 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, | |||
298 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, | |||
299 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, | |||
300 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, | |||
301 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, | |||
302 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } | |||
303 | }; | |||
304 | ||||
305 | if (SrcTy.isInteger() && ST->hasNEON()) { | |||
306 | if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl, | |||
307 | ISD, DstTy.getSimpleVT(), | |||
308 | SrcTy.getSimpleVT())) | |||
309 | return Entry->Cost; | |||
310 | } | |||
311 | ||||
312 | // Scalar integer conversion costs. | |||
313 | static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { | |||
314 | // i16 -> i64 requires two dependent operations. | |||
315 | { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, | |||
316 | ||||
317 | // Truncates on i64 are assumed to be free. | |||
318 | { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, | |||
319 | { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, | |||
320 | { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, | |||
321 | { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } | |||
322 | }; | |||
323 | ||||
324 | if (SrcTy.isInteger()) { | |||
325 | if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, | |||
326 | DstTy.getSimpleVT(), | |||
327 | SrcTy.getSimpleVT())) | |||
328 | return Entry->Cost; | |||
329 | } | |||
330 | ||||
331 | return BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
332 | } | |||
333 | ||||
334 | int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, | |||
335 | unsigned Index) { | |||
336 | // Penalize inserting into an D-subregister. We end up with a three times | |||
337 | // lower estimated throughput on swift. | |||
338 | if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement && | |||
339 | ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) | |||
340 | return 3; | |||
341 | ||||
342 | if ((Opcode == Instruction::InsertElement || | |||
343 | Opcode == Instruction::ExtractElement)) { | |||
344 | // Cross-class copies are expensive on many microarchitectures, | |||
345 | // so assume they are expensive by default. | |||
346 | if (ValTy->getVectorElementType()->isIntegerTy()) | |||
347 | return 3; | |||
348 | ||||
349 | // Even if it's not a cross class copy, this likely leads to mixing | |||
350 | // of NEON and VFP code and should be therefore penalized. | |||
351 | if (ValTy->isVectorTy() && | |||
352 | ValTy->getScalarSizeInBits() <= 32) | |||
353 | return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); | |||
354 | } | |||
355 | ||||
356 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | |||
357 | } | |||
358 | ||||
359 | int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | |||
360 | const Instruction *I) { | |||
361 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
362 | // On NEON a vector select gets lowered to vbsl. | |||
363 | if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { | |||
364 | // Lowering of some vector selects is currently far from perfect. | |||
365 | static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { | |||
366 | { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, | |||
367 | { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, | |||
368 | { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } | |||
369 | }; | |||
370 | ||||
371 | EVT SelCondTy = TLI->getValueType(DL, CondTy); | |||
372 | EVT SelValTy = TLI->getValueType(DL, ValTy); | |||
373 | if (SelCondTy.isSimple() && SelValTy.isSimple()) { | |||
374 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, | |||
375 | SelCondTy.getSimpleVT(), | |||
376 | SelValTy.getSimpleVT())) | |||
377 | return Entry->Cost; | |||
378 | } | |||
379 | ||||
380 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | |||
381 | return LT.first; | |||
382 | } | |||
383 | ||||
384 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); | |||
385 | } | |||
386 | ||||
387 | int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, | |||
388 | const SCEV *Ptr) { | |||
389 | // Address computations in vectorized code with non-consecutive addresses will | |||
390 | // likely result in more instructions compared to scalar code where the | |||
391 | // computation can more often be merged into the index mode. The resulting | |||
392 | // extra micro-ops can significantly decrease throughput. | |||
393 | unsigned NumVectorInstToHideOverhead = 10; | |||
394 | int MaxMergeDistance = 64; | |||
395 | ||||
396 | if (Ty->isVectorTy() && SE && | |||
397 | !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) | |||
398 | return NumVectorInstToHideOverhead; | |||
399 | ||||
400 | // In many cases the address computation is not merged into the instruction | |||
401 | // addressing mode. | |||
402 | return 1; | |||
403 | } | |||
404 | ||||
405 | int ARMTTIImpl::getMemcpyCost(const Instruction *I) { | |||
406 | const MemCpyInst *MI = dyn_cast<MemCpyInst>(I); | |||
407 | assert(MI && "MemcpyInst expected")((MI && "MemcpyInst expected") ? static_cast<void> (0) : __assert_fail ("MI && \"MemcpyInst expected\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 407, __PRETTY_FUNCTION__)); | |||
408 | ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()); | |||
409 | ||||
410 | // To model the cost of a library call, we assume 1 for the call, and | |||
411 | // 3 for the argument setup. | |||
412 | const unsigned LibCallCost = 4; | |||
413 | ||||
414 | // If 'size' is not a constant, a library call will be generated. | |||
415 | if (!C) | |||
416 | return LibCallCost; | |||
417 | ||||
418 | const unsigned Size = C->getValue().getZExtValue(); | |||
419 | const unsigned DstAlign = MI->getDestAlignment(); | |||
420 | const unsigned SrcAlign = MI->getSourceAlignment(); | |||
421 | const Function *F = I->getParent()->getParent(); | |||
422 | const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize()); | |||
423 | std::vector<EVT> MemOps; | |||
424 | ||||
425 | // MemOps will be poplulated with a list of data types that needs to be | |||
426 | // loaded and stored. That's why we multiply the number of elements by 2 to | |||
427 | // get the cost for this memcpy. | |||
428 | if (getTLI()->findOptimalMemOpLowering( | |||
429 | MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/, | |||
430 | false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/, | |||
431 | MI->getDestAddressSpace(), MI->getSourceAddressSpace(), | |||
432 | F->getAttributes())) | |||
433 | return MemOps.size() * 2; | |||
434 | ||||
435 | // If we can't find an optimal memop lowering, return the default cost | |||
436 | return LibCallCost; | |||
437 | } | |||
438 | ||||
439 | int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | |||
440 | Type *SubTp) { | |||
441 | if (Kind == TTI::SK_Broadcast) { | |||
442 | static const CostTblEntry NEONDupTbl[] = { | |||
443 | // VDUP handles these cases. | |||
444 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
445 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
446 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
447 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
448 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, | |||
449 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, | |||
450 | ||||
451 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, | |||
452 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, | |||
453 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, | |||
454 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; | |||
455 | ||||
456 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
457 | ||||
458 | if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, | |||
459 | LT.second)) | |||
460 | return LT.first * Entry->Cost; | |||
461 | ||||
462 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
463 | } | |||
464 | if (Kind == TTI::SK_Reverse) { | |||
465 | static const CostTblEntry NEONShuffleTbl[] = { | |||
466 | // Reverse shuffle cost one instruction if we are shuffling within a | |||
467 | // double word (vrev) or two if we shuffle a quad word (vrev, vext). | |||
468 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
469 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
470 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
471 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
472 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, | |||
473 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, | |||
474 | ||||
475 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, | |||
476 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, | |||
477 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, | |||
478 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; | |||
479 | ||||
480 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
481 | ||||
482 | if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, | |||
483 | LT.second)) | |||
484 | return LT.first * Entry->Cost; | |||
485 | ||||
486 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
487 | } | |||
488 | if (Kind == TTI::SK_Select) { | |||
489 | static const CostTblEntry NEONSelShuffleTbl[] = { | |||
490 | // Select shuffle cost table for ARM. Cost is the number of instructions | |||
491 | // required to create the shuffled vector. | |||
492 | ||||
493 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
494 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
495 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
496 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
497 | ||||
498 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, | |||
499 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, | |||
500 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, | |||
501 | ||||
502 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, | |||
503 | ||||
504 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; | |||
505 | ||||
506 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
507 | if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, | |||
508 | ISD::VECTOR_SHUFFLE, LT.second)) | |||
509 | return LT.first * Entry->Cost; | |||
510 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
511 | } | |||
512 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
513 | } | |||
514 | ||||
515 | int ARMTTIImpl::getArithmeticInstrCost( | |||
516 | unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, | |||
517 | TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, | |||
518 | TTI::OperandValueProperties Opd2PropInfo, | |||
519 | ArrayRef<const Value *> Args) { | |||
520 | int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); | |||
521 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | |||
522 | ||||
523 | const unsigned FunctionCallDivCost = 20; | |||
524 | const unsigned ReciprocalDivCost = 10; | |||
525 | static const CostTblEntry CostTbl[] = { | |||
526 | // Division. | |||
527 | // These costs are somewhat random. Choose a cost of 20 to indicate that | |||
528 | // vectorizing devision (added function call) is going to be very expensive. | |||
529 | // Double registers types. | |||
530 | { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
531 | { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
532 | { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
533 | { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
534 | { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
535 | { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
536 | { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
537 | { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
538 | { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, | |||
539 | { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, | |||
540 | { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, | |||
541 | { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, | |||
542 | { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, | |||
543 | { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, | |||
544 | { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, | |||
545 | { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, | |||
546 | // Quad register types. | |||
547 | { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
548 | { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
549 | { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
550 | { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
551 | { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
552 | { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
553 | { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
554 | { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
555 | { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
556 | { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
557 | { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
558 | { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
559 | { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
560 | { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
561 | { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
562 | { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
563 | // Multiplication. | |||
564 | }; | |||
565 | ||||
566 | if (ST->hasNEON()) | |||
567 | if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) | |||
568 | return LT.first * Entry->Cost; | |||
569 | ||||
570 | int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, | |||
571 | Opd1PropInfo, Opd2PropInfo); | |||
572 | ||||
573 | // This is somewhat of a hack. The problem that we are facing is that SROA | |||
574 | // creates a sequence of shift, and, or instructions to construct values. | |||
575 | // These sequences are recognized by the ISel and have zero-cost. Not so for | |||
576 | // the vectorized code. Because we have support for v2i64 but not i64 those | |||
577 | // sequences look particularly beneficial to vectorize. | |||
578 | // To work around this we increase the cost of v2i64 operations to make them | |||
579 | // seem less beneficial. | |||
580 | if (LT.second == MVT::v2i64 && | |||
581 | Op2Info == TargetTransformInfo::OK_UniformConstantValue) | |||
582 | Cost += 4; | |||
583 | ||||
584 | return Cost; | |||
585 | } | |||
586 | ||||
587 | int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | |||
588 | unsigned AddressSpace, const Instruction *I) { | |||
589 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
590 | ||||
591 | if (Src->isVectorTy() && Alignment != 16 && | |||
592 | Src->getVectorElementType()->isDoubleTy()) { | |||
593 | // Unaligned loads/stores are extremely inefficient. | |||
594 | // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. | |||
595 | return LT.first * 4; | |||
596 | } | |||
597 | return LT.first; | |||
598 | } | |||
599 | ||||
600 | int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | |||
601 | unsigned Factor, | |||
602 | ArrayRef<unsigned> Indices, | |||
603 | unsigned Alignment, | |||
604 | unsigned AddressSpace, | |||
605 | bool UseMaskForCond, | |||
606 | bool UseMaskForGaps) { | |||
607 | assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast <void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 607, __PRETTY_FUNCTION__)); | |||
608 | assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type" ) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 608, __PRETTY_FUNCTION__)); | |||
609 | ||||
610 | // vldN/vstN doesn't support vector types of i64/f64 element. | |||
611 | bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; | |||
612 | ||||
613 | if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits && | |||
614 | !UseMaskForCond && !UseMaskForGaps) { | |||
615 | unsigned NumElts = VecTy->getVectorNumElements(); | |||
616 | auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); | |||
617 | ||||
618 | // vldN/vstN only support legal vector types of size 64 or 128 in bits. | |||
619 | // Accesses having vector types that are a multiple of 128 bits can be | |||
620 | // matched to more than one vldN/vstN instruction. | |||
621 | if (NumElts % Factor == 0 && | |||
622 | TLI->isLegalInterleavedAccessType(SubVecTy, DL)) | |||
623 | return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); | |||
624 | } | |||
625 | ||||
626 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
627 | Alignment, AddressSpace, | |||
628 | UseMaskForCond, UseMaskForGaps); | |||
629 | } | |||
630 | ||||
631 | void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | |||
632 | TTI::UnrollingPreferences &UP) { | |||
633 | // Only currently enable these preferences for M-Class cores. | |||
634 | if (!ST->isMClass()) | |||
| ||||
635 | return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); | |||
636 | ||||
637 | // Disable loop unrolling for Oz and Os. | |||
638 | UP.OptSizeThreshold = 0; | |||
639 | UP.PartialOptSizeThreshold = 0; | |||
640 | if (L->getHeader()->getParent()->hasOptSize()) | |||
641 | return; | |||
642 | ||||
643 | // Only enable on Thumb-2 targets. | |||
644 | if (!ST->isThumb2()) | |||
645 | return; | |||
646 | ||||
647 | SmallVector<BasicBlock*, 4> ExitingBlocks; | |||
648 | L->getExitingBlocks(ExitingBlocks); | |||
649 | LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ) | |||
650 | << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ) | |||
651 | << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ); | |||
652 | ||||
653 | // Only allow another exit other than the latch. This acts as an early exit | |||
654 | // as it mirrors the profitability calculation of the runtime unroller. | |||
655 | if (ExitingBlocks.size() > 2) | |||
656 | return; | |||
657 | ||||
658 | // Limit the CFG of the loop body for targets with a branch predictor. | |||
659 | // Allowing 4 blocks permits if-then-else diamonds in the body. | |||
660 | if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) | |||
661 | return; | |||
662 | ||||
663 | // Scan the loop: don't unroll loops with calls as this could prevent | |||
664 | // inlining. | |||
665 | unsigned Cost = 0; | |||
666 | for (auto *BB : L->getBlocks()) { | |||
667 | for (auto &I : *BB) { | |||
668 | if (isa<CallInst>(I) || isa<InvokeInst>(I)) { | |||
669 | ImmutableCallSite CS(&I); | |||
670 | if (const Function *F = CS.getCalledFunction()) { | |||
671 | if (!isLoweredToCall(F)) | |||
672 | continue; | |||
673 | } | |||
674 | return; | |||
675 | } | |||
676 | SmallVector<const Value*, 4> Operands(I.value_op_begin(), | |||
677 | I.value_op_end()); | |||
678 | Cost += getUserCost(&I, Operands); | |||
679 | } | |||
680 | } | |||
681 | ||||
682 | LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Cost of loop: " << Cost << "\n"; } } while (false); | |||
683 | ||||
684 | UP.Partial = true; | |||
685 | UP.Runtime = true; | |||
686 | UP.UnrollRemainder = true; | |||
687 | UP.DefaultUnrollRuntimeCount = 4; | |||
688 | UP.UnrollAndJam = true; | |||
689 | UP.UnrollAndJamInnerLoopThreshold = 60; | |||
690 | ||||
691 | // Force unrolling small loops can be very useful because of the branch | |||
692 | // taken cost of the backedge. | |||
693 | if (Cost < 12) | |||
694 | UP.Force = true; | |||
695 | } |
1 | //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// This file provides helpers for the implementation of | |||
10 | /// a TargetTransformInfo-conforming class. | |||
11 | /// | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | |||
15 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | |||
16 | ||||
17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
18 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
19 | #include "llvm/Analysis/VectorUtils.h" | |||
20 | #include "llvm/IR/CallSite.h" | |||
21 | #include "llvm/IR/DataLayout.h" | |||
22 | #include "llvm/IR/Function.h" | |||
23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | |||
24 | #include "llvm/IR/Operator.h" | |||
25 | #include "llvm/IR/Type.h" | |||
26 | ||||
27 | namespace llvm { | |||
28 | ||||
29 | /// Base class for use as a mix-in that aids implementing | |||
30 | /// a TargetTransformInfo-compatible class. | |||
31 | class TargetTransformInfoImplBase { | |||
32 | protected: | |||
33 | typedef TargetTransformInfo TTI; | |||
34 | ||||
35 | const DataLayout &DL; | |||
36 | ||||
37 | explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} | |||
38 | ||||
39 | public: | |||
40 | // Provide value semantics. MSVC requires that we spell all of these out. | |||
41 | TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) | |||
42 | : DL(Arg.DL) {} | |||
43 | TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} | |||
44 | ||||
45 | const DataLayout &getDataLayout() const { return DL; } | |||
46 | ||||
47 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | |||
48 | switch (Opcode) { | |||
49 | default: | |||
50 | // By default, just classify everything as 'basic'. | |||
51 | return TTI::TCC_Basic; | |||
52 | ||||
53 | case Instruction::GetElementPtr: | |||
54 | llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 54); | |||
55 | ||||
56 | case Instruction::BitCast: | |||
57 | assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type" ) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 57, __PRETTY_FUNCTION__)); | |||
58 | if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) | |||
59 | // Identity and pointer-to-pointer casts are free. | |||
60 | return TTI::TCC_Free; | |||
61 | ||||
62 | // Otherwise, the default basic cost is used. | |||
63 | return TTI::TCC_Basic; | |||
64 | ||||
65 | case Instruction::FDiv: | |||
66 | case Instruction::FRem: | |||
67 | case Instruction::SDiv: | |||
68 | case Instruction::SRem: | |||
69 | case Instruction::UDiv: | |||
70 | case Instruction::URem: | |||
71 | return TTI::TCC_Expensive; | |||
72 | ||||
73 | case Instruction::IntToPtr: { | |||
74 | // An inttoptr cast is free so long as the input is a legal integer type | |||
75 | // which doesn't contain values outside the range of a pointer. | |||
76 | unsigned OpSize = OpTy->getScalarSizeInBits(); | |||
77 | if (DL.isLegalInteger(OpSize) && | |||
78 | OpSize <= DL.getPointerTypeSizeInBits(Ty)) | |||
79 | return TTI::TCC_Free; | |||
80 | ||||
81 | // Otherwise it's not a no-op. | |||
82 | return TTI::TCC_Basic; | |||
83 | } | |||
84 | case Instruction::PtrToInt: { | |||
85 | // A ptrtoint cast is free so long as the result is large enough to store | |||
86 | // the pointer, and a legal integer type. | |||
87 | unsigned DestSize = Ty->getScalarSizeInBits(); | |||
88 | if (DL.isLegalInteger(DestSize) && | |||
89 | DestSize >= DL.getPointerTypeSizeInBits(OpTy)) | |||
90 | return TTI::TCC_Free; | |||
91 | ||||
92 | // Otherwise it's not a no-op. | |||
93 | return TTI::TCC_Basic; | |||
94 | } | |||
95 | case Instruction::Trunc: | |||
96 | // trunc to a native type is free (assuming the target has compare and | |||
97 | // shift-right of the same width). | |||
98 | if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) | |||
99 | return TTI::TCC_Free; | |||
100 | ||||
101 | return TTI::TCC_Basic; | |||
102 | } | |||
103 | } | |||
104 | ||||
105 | int getGEPCost(Type *PointeeType, const Value *Ptr, | |||
106 | ArrayRef<const Value *> Operands) { | |||
107 | // In the basic model, we just assume that all-constant GEPs will be folded | |||
108 | // into their uses via addressing modes. | |||
109 | for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) | |||
110 | if (!isa<Constant>(Operands[Idx])) | |||
111 | return TTI::TCC_Basic; | |||
112 | ||||
113 | return TTI::TCC_Free; | |||
114 | } | |||
115 | ||||
116 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | |||
117 | unsigned &JTSize) { | |||
118 | JTSize = 0; | |||
119 | return SI.getNumCases(); | |||
120 | } | |||
121 | ||||
122 | int getExtCost(const Instruction *I, const Value *Src) { | |||
123 | return TTI::TCC_Basic; | |||
124 | } | |||
125 | ||||
126 | unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) { | |||
127 | assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 127, __PRETTY_FUNCTION__)); | |||
128 | ||||
129 | // The target-independent implementation just measures the size of the | |||
130 | // function by approximating that each argument will take on average one | |||
131 | // instruction to prepare. | |||
132 | ||||
133 | if (NumArgs < 0) | |||
134 | // Set the argument number to the number of explicit arguments in the | |||
135 | // function. | |||
136 | NumArgs = FTy->getNumParams(); | |||
137 | ||||
138 | return TTI::TCC_Basic * (NumArgs + 1); | |||
139 | } | |||
140 | ||||
141 | unsigned getInliningThresholdMultiplier() { return 1; } | |||
142 | ||||
143 | unsigned getMemcpyCost(const Instruction *I) { | |||
144 | return TTI::TCC_Expensive; | |||
145 | } | |||
146 | ||||
147 | bool hasBranchDivergence() { return false; } | |||
148 | ||||
149 | bool isSourceOfDivergence(const Value *V) { return false; } | |||
150 | ||||
151 | bool isAlwaysUniform(const Value *V) { return false; } | |||
152 | ||||
153 | unsigned getFlatAddressSpace () { | |||
154 | return -1; | |||
155 | } | |||
156 | ||||
157 | bool isLoweredToCall(const Function *F) { | |||
158 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 158, __PRETTY_FUNCTION__)); | |||
159 | ||||
160 | // FIXME: These should almost certainly not be handled here, and instead | |||
161 | // handled with the help of TLI or the target itself. This was largely | |||
162 | // ported from existing analysis heuristics here so that such refactorings | |||
163 | // can take place in the future. | |||
164 | ||||
165 | if (F->isIntrinsic()) | |||
166 | return false; | |||
167 | ||||
168 | if (F->hasLocalLinkage() || !F->hasName()) | |||
169 | return true; | |||
170 | ||||
171 | StringRef Name = F->getName(); | |||
172 | ||||
173 | // These will all likely lower to a single selection DAG node. | |||
174 | if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || | |||
175 | Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || | |||
176 | Name == "fmin" || Name == "fminf" || Name == "fminl" || | |||
177 | Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || | |||
178 | Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || | |||
179 | Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") | |||
180 | return false; | |||
181 | ||||
182 | // These are all likely to be optimized into something smaller. | |||
183 | if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || | |||
184 | Name == "exp2l" || Name == "exp2f" || Name == "floor" || | |||
185 | Name == "floorf" || Name == "ceil" || Name == "round" || | |||
186 | Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || | |||
187 | Name == "llabs") | |||
188 | return false; | |||
189 | ||||
190 | return true; | |||
191 | } | |||
192 | ||||
193 | void getUnrollingPreferences(Loop *, ScalarEvolution &, | |||
194 | TTI::UnrollingPreferences &) {} | |||
195 | ||||
196 | bool isLegalAddImmediate(int64_t Imm) { return false; } | |||
197 | ||||
198 | bool isLegalICmpImmediate(int64_t Imm) { return false; } | |||
199 | ||||
200 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | |||
201 | bool HasBaseReg, int64_t Scale, | |||
202 | unsigned AddrSpace, Instruction *I = nullptr) { | |||
203 | // Guess that only reg and reg+reg addressing is allowed. This heuristic is | |||
204 | // taken from the implementation of LSR. | |||
205 | return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); | |||
206 | } | |||
207 | ||||
208 | bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { | |||
209 | return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, | |||
210 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | |||
211 | std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, | |||
212 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); | |||
213 | } | |||
214 | ||||
215 | bool canMacroFuseCmp() { return false; } | |||
216 | ||||
217 | bool shouldFavorPostInc() const { return false; } | |||
218 | ||||
219 | bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } | |||
220 | ||||
221 | bool isLegalMaskedStore(Type *DataType) { return false; } | |||
222 | ||||
223 | bool isLegalMaskedLoad(Type *DataType) { return false; } | |||
224 | ||||
225 | bool isLegalMaskedScatter(Type *DataType) { return false; } | |||
226 | ||||
227 | bool isLegalMaskedGather(Type *DataType) { return false; } | |||
228 | ||||
229 | bool isLegalMaskedCompressStore(Type *DataType) { return false; } | |||
230 | ||||
231 | bool isLegalMaskedExpandLoad(Type *DataType) { return false; } | |||
232 | ||||
233 | bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } | |||
234 | ||||
235 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } | |||
236 | ||||
237 | bool prefersVectorizedAddressing() { return true; } | |||
238 | ||||
239 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | |||
240 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | |||
241 | // Guess that all legal addressing mode are free. | |||
242 | if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, | |||
243 | Scale, AddrSpace)) | |||
244 | return 0; | |||
245 | return -1; | |||
246 | } | |||
247 | ||||
248 | bool LSRWithInstrQueries() { return false; } | |||
249 | ||||
250 | bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } | |||
251 | ||||
252 | bool isProfitableToHoist(Instruction *I) { return true; } | |||
253 | ||||
254 | bool useAA() { return false; } | |||
255 | ||||
256 | bool isTypeLegal(Type *Ty) { return false; } | |||
257 | ||||
258 | unsigned getJumpBufAlignment() { return 0; } | |||
259 | ||||
260 | unsigned getJumpBufSize() { return 0; } | |||
261 | ||||
262 | bool shouldBuildLookupTables() { return true; } | |||
263 | bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } | |||
264 | ||||
265 | bool useColdCCForColdCall(Function &F) { return false; } | |||
266 | ||||
267 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | |||
268 | return 0; | |||
269 | } | |||
270 | ||||
271 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | |||
272 | unsigned VF) { return 0; } | |||
273 | ||||
274 | bool supportsEfficientVectorElementLoadStore() { return false; } | |||
275 | ||||
276 | bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } | |||
277 | ||||
278 | const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( | |||
279 | bool IsZeroCmp) const { | |||
280 | return nullptr; | |||
281 | } | |||
282 | ||||
283 | bool enableInterleavedAccessVectorization() { return false; } | |||
284 | ||||
285 | bool enableMaskedInterleavedAccessVectorization() { return false; } | |||
286 | ||||
287 | bool isFPVectorizationPotentiallyUnsafe() { return false; } | |||
288 | ||||
289 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, | |||
290 | unsigned BitWidth, | |||
291 | unsigned AddressSpace, | |||
292 | unsigned Alignment, | |||
293 | bool *Fast) { return false; } | |||
294 | ||||
295 | TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { | |||
296 | return TTI::PSK_Software; | |||
297 | } | |||
298 | ||||
299 | bool haveFastSqrt(Type *Ty) { return false; } | |||
300 | ||||
301 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } | |||
302 | ||||
303 | unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } | |||
304 | ||||
305 | int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | |||
306 | Type *Ty) { | |||
307 | return 0; | |||
308 | } | |||
309 | ||||
310 | unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } | |||
311 | ||||
312 | unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | |||
313 | Type *Ty) { | |||
314 | return TTI::TCC_Free; | |||
315 | } | |||
316 | ||||
317 | unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, | |||
318 | Type *Ty) { | |||
319 | return TTI::TCC_Free; | |||
320 | } | |||
321 | ||||
322 | unsigned getNumberOfRegisters(bool Vector) { return 8; } | |||
323 | ||||
324 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } | |||
325 | ||||
326 | unsigned getMinVectorRegisterBitWidth() { return 128; } | |||
327 | ||||
328 | bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } | |||
329 | ||||
330 | unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } | |||
331 | ||||
332 | bool | |||
333 | shouldConsiderAddressTypePromotion(const Instruction &I, | |||
334 | bool &AllowPromotionWithoutCommonHeader) { | |||
335 | AllowPromotionWithoutCommonHeader = false; | |||
336 | return false; | |||
337 | } | |||
338 | ||||
339 | unsigned getCacheLineSize() { return 0; } | |||
340 | ||||
341 | llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { | |||
342 | switch (Level) { | |||
343 | case TargetTransformInfo::CacheLevel::L1D: | |||
344 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
345 | case TargetTransformInfo::CacheLevel::L2D: | |||
346 | return llvm::Optional<unsigned>(); | |||
347 | } | |||
348 | ||||
349 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 349); | |||
350 | } | |||
351 | ||||
352 | llvm::Optional<unsigned> getCacheAssociativity( | |||
353 | TargetTransformInfo::CacheLevel Level) { | |||
354 | switch (Level) { | |||
355 | case TargetTransformInfo::CacheLevel::L1D: | |||
356 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
357 | case TargetTransformInfo::CacheLevel::L2D: | |||
358 | return llvm::Optional<unsigned>(); | |||
359 | } | |||
360 | ||||
361 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 361); | |||
362 | } | |||
363 | ||||
364 | unsigned getPrefetchDistance() { return 0; } | |||
365 | ||||
366 | unsigned getMinPrefetchStride() { return 1; } | |||
367 | ||||
368 | unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX(2147483647 *2U +1U); } | |||
369 | ||||
370 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } | |||
371 | ||||
372 | unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, | |||
373 | TTI::OperandValueKind Opd1Info, | |||
374 | TTI::OperandValueKind Opd2Info, | |||
375 | TTI::OperandValueProperties Opd1PropInfo, | |||
376 | TTI::OperandValueProperties Opd2PropInfo, | |||
377 | ArrayRef<const Value *> Args) { | |||
378 | return 1; | |||
379 | } | |||
380 | ||||
381 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, | |||
382 | Type *SubTp) { | |||
383 | return 1; | |||
384 | } | |||
385 | ||||
386 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | |||
387 | const Instruction *I) { return 1; } | |||
388 | ||||
389 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, | |||
390 | VectorType *VecTy, unsigned Index) { | |||
391 | return 1; | |||
392 | } | |||
393 | ||||
394 | unsigned getCFInstrCost(unsigned Opcode) { return 1; } | |||
395 | ||||
396 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | |||
397 | const Instruction *I) { | |||
398 | return 1; | |||
399 | } | |||
400 | ||||
401 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | |||
402 | return 1; | |||
403 | } | |||
404 | ||||
405 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | |||
406 | unsigned AddressSpace, const Instruction *I) { | |||
407 | return 1; | |||
408 | } | |||
409 | ||||
410 | unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | |||
411 | unsigned AddressSpace) { | |||
412 | return 1; | |||
413 | } | |||
414 | ||||
415 | unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, | |||
416 | bool VariableMask, | |||
417 | unsigned Alignment) { | |||
418 | return 1; | |||
419 | } | |||
420 | ||||
421 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | |||
422 | unsigned Factor, | |||
423 | ArrayRef<unsigned> Indices, | |||
424 | unsigned Alignment, unsigned AddressSpace, | |||
425 | bool UseMaskForCond = false, | |||
426 | bool UseMaskForGaps = false) { | |||
427 | return 1; | |||
428 | } | |||
429 | ||||
430 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
431 | ArrayRef<Type *> Tys, FastMathFlags FMF, | |||
432 | unsigned ScalarizationCostPassed) { | |||
433 | return 1; | |||
434 | } | |||
435 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
436 | ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { | |||
437 | return 1; | |||
438 | } | |||
439 | ||||
440 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | |||
441 | return 1; | |||
442 | } | |||
443 | ||||
444 | unsigned getNumberOfParts(Type *Tp) { return 0; } | |||
445 | ||||
446 | unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, | |||
447 | const SCEV *) { | |||
448 | return 0; | |||
449 | } | |||
450 | ||||
451 | unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } | |||
452 | ||||
453 | unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } | |||
454 | ||||
455 | unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } | |||
456 | ||||
457 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { | |||
458 | return false; | |||
459 | } | |||
460 | ||||
461 | unsigned getAtomicMemIntrinsicMaxElementSize() const { | |||
462 | // Note for overrides: You must ensure for all element unordered-atomic | |||
463 | // memory intrinsics that all power-of-2 element sizes up to, and | |||
464 | // including, the return value of this method have a corresponding | |||
465 | // runtime lib call. These runtime lib call definitions can be found | |||
466 | // in RuntimeLibcalls.h | |||
467 | return 0; | |||
468 | } | |||
469 | ||||
470 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | |||
471 | Type *ExpectedType) { | |||
472 | return nullptr; | |||
473 | } | |||
474 | ||||
475 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, | |||
476 | unsigned SrcAlign, unsigned DestAlign) const { | |||
477 | return Type::getInt8Ty(Context); | |||
478 | } | |||
479 | ||||
480 | void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, | |||
481 | LLVMContext &Context, | |||
482 | unsigned RemainingBytes, | |||
483 | unsigned SrcAlign, | |||
484 | unsigned DestAlign) const { | |||
485 | for (unsigned i = 0; i != RemainingBytes; ++i) | |||
486 | OpsOut.push_back(Type::getInt8Ty(Context)); | |||
487 | } | |||
488 | ||||
489 | bool areInlineCompatible(const Function *Caller, | |||
490 | const Function *Callee) const { | |||
491 | return (Caller->getFnAttribute("target-cpu") == | |||
492 | Callee->getFnAttribute("target-cpu")) && | |||
493 | (Caller->getFnAttribute("target-features") == | |||
494 | Callee->getFnAttribute("target-features")); | |||
495 | } | |||
496 | ||||
497 | bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, | |||
498 | SmallPtrSetImpl<Argument *> &Args) const { | |||
499 | return (Caller->getFnAttribute("target-cpu") == | |||
500 | Callee->getFnAttribute("target-cpu")) && | |||
501 | (Caller->getFnAttribute("target-features") == | |||
502 | Callee->getFnAttribute("target-features")); | |||
503 | } | |||
504 | ||||
505 | bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, | |||
506 | const DataLayout &DL) const { | |||
507 | return false; | |||
508 | } | |||
509 | ||||
510 | bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, | |||
511 | const DataLayout &DL) const { | |||
512 | return false; | |||
513 | } | |||
514 | ||||
515 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } | |||
516 | ||||
517 | bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } | |||
518 | ||||
519 | bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } | |||
520 | ||||
521 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | |||
522 | unsigned Alignment, | |||
523 | unsigned AddrSpace) const { | |||
524 | return true; | |||
525 | } | |||
526 | ||||
527 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | |||
528 | unsigned Alignment, | |||
529 | unsigned AddrSpace) const { | |||
530 | return true; | |||
531 | } | |||
532 | ||||
533 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | |||
534 | unsigned ChainSizeInBytes, | |||
535 | VectorType *VecTy) const { | |||
536 | return VF; | |||
537 | } | |||
538 | ||||
539 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | |||
540 | unsigned ChainSizeInBytes, | |||
541 | VectorType *VecTy) const { | |||
542 | return VF; | |||
543 | } | |||
544 | ||||
545 | bool useReductionIntrinsic(unsigned Opcode, Type *Ty, | |||
546 | TTI::ReductionFlags Flags) const { | |||
547 | return false; | |||
548 | } | |||
549 | ||||
550 | bool shouldExpandReduction(const IntrinsicInst *II) const { | |||
551 | return true; | |||
552 | } | |||
553 | ||||
554 | protected: | |||
555 | // Obtain the minimum required size to hold the value (without the sign) | |||
556 | // In case of a vector it returns the min required size for one element. | |||
557 | unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { | |||
558 | if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { | |||
559 | const auto* VectorValue = cast<Constant>(Val); | |||
560 | ||||
561 | // In case of a vector need to pick the max between the min | |||
562 | // required size for each element | |||
563 | auto *VT = cast<VectorType>(Val->getType()); | |||
564 | ||||
565 | // Assume unsigned elements | |||
566 | isSigned = false; | |||
567 | ||||
568 | // The max required size is the total vector width divided by num | |||
569 | // of elements in the vector | |||
570 | unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); | |||
571 | ||||
572 | unsigned MinRequiredSize = 0; | |||
573 | for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { | |||
574 | if (auto* IntElement = | |||
575 | dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { | |||
576 | bool signedElement = IntElement->getValue().isNegative(); | |||
577 | // Get the element min required size. | |||
578 | unsigned ElementMinRequiredSize = | |||
579 | IntElement->getValue().getMinSignedBits() - 1; | |||
580 | // In case one element is signed then all the vector is signed. | |||
581 | isSigned |= signedElement; | |||
582 | // Save the max required bit size between all the elements. | |||
583 | MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); | |||
584 | } | |||
585 | else { | |||
586 | // not an int constant element | |||
587 | return MaxRequiredSize; | |||
588 | } | |||
589 | } | |||
590 | return MinRequiredSize; | |||
591 | } | |||
592 | ||||
593 | if (const auto* CI = dyn_cast<ConstantInt>(Val)) { | |||
594 | isSigned = CI->getValue().isNegative(); | |||
595 | return CI->getValue().getMinSignedBits() - 1; | |||
596 | } | |||
597 | ||||
598 | if (const auto* Cast = dyn_cast<SExtInst>(Val)) { | |||
599 | isSigned = true; | |||
600 | return Cast->getSrcTy()->getScalarSizeInBits() - 1; | |||
601 | } | |||
602 | ||||
603 | if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { | |||
604 | isSigned = false; | |||
605 | return Cast->getSrcTy()->getScalarSizeInBits(); | |||
606 | } | |||
607 | ||||
608 | isSigned = false; | |||
609 | return Val->getType()->getScalarSizeInBits(); | |||
610 | } | |||
611 | ||||
612 | bool isStridedAccess(const SCEV *Ptr) { | |||
613 | return Ptr && isa<SCEVAddRecExpr>(Ptr); | |||
614 | } | |||
615 | ||||
616 | const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, | |||
617 | const SCEV *Ptr) { | |||
618 | if (!isStridedAccess(Ptr)) | |||
619 | return nullptr; | |||
620 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); | |||
621 | return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); | |||
622 | } | |||
623 | ||||
624 | bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, | |||
625 | int64_t MergeDistance) { | |||
626 | const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); | |||
627 | if (!Step) | |||
628 | return false; | |||
629 | APInt StrideVal = Step->getAPInt(); | |||
630 | if (StrideVal.getBitWidth() > 64) | |||
631 | return false; | |||
632 | // FIXME: Need to take absolute value for negative stride case. | |||
633 | return StrideVal.getSExtValue() < MergeDistance; | |||
634 | } | |||
635 | }; | |||
636 | ||||
637 | /// CRTP base class for use as a mix-in that aids implementing | |||
638 | /// a TargetTransformInfo-compatible class. | |||
639 | template <typename T> | |||
640 | class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { | |||
641 | private: | |||
642 | typedef TargetTransformInfoImplBase BaseT; | |||
643 | ||||
644 | protected: | |||
645 | explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} | |||
646 | ||||
647 | public: | |||
648 | using BaseT::getCallCost; | |||
649 | ||||
650 | unsigned getCallCost(const Function *F, int NumArgs, const User *U) { | |||
651 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 651, __PRETTY_FUNCTION__)); | |||
652 | ||||
653 | if (NumArgs < 0) | |||
654 | // Set the argument number to the number of explicit arguments in the | |||
655 | // function. | |||
656 | NumArgs = F->arg_size(); | |||
657 | ||||
658 | if (Intrinsic::ID IID = F->getIntrinsicID()) { | |||
659 | FunctionType *FTy = F->getFunctionType(); | |||
660 | SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); | |||
661 | return static_cast<T *>(this) | |||
662 | ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); | |||
663 | } | |||
664 | ||||
665 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | |||
666 | return TTI::TCC_Basic; // Give a basic cost if it will be lowered | |||
667 | // directly. | |||
668 | ||||
669 | return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U); | |||
670 | } | |||
671 | ||||
672 | unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments, | |||
673 | const User *U) { | |||
674 | // Simply delegate to generic handling of the call. | |||
675 | // FIXME: We should use instsimplify or something else to catch calls which | |||
676 | // will constant fold with these arguments. | |||
677 | return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U); | |||
678 | } | |||
679 | ||||
680 | using BaseT::getGEPCost; | |||
681 | ||||
682 | int getGEPCost(Type *PointeeType, const Value *Ptr, | |||
683 | ArrayRef<const Value *> Operands) { | |||
684 | const GlobalValue *BaseGV = nullptr; | |||
685 | if (Ptr != nullptr) { | |||
686 | // TODO: will remove this when pointers have an opaque type. | |||
687 | assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 689, __PRETTY_FUNCTION__)) | |||
688 | PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 689, __PRETTY_FUNCTION__)) | |||
689 | "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 689, __PRETTY_FUNCTION__)); | |||
690 | BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); | |||
691 | } | |||
692 | bool HasBaseReg = (BaseGV == nullptr); | |||
693 | ||||
694 | auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); | |||
| ||||
695 | APInt BaseOffset(PtrSizeBits, 0); | |||
696 | int64_t Scale = 0; | |||
697 | ||||
698 | auto GTI = gep_type_begin(PointeeType, Operands); | |||
699 | Type *TargetType = nullptr; | |||
700 | ||||
701 | // Handle the case where the GEP instruction has a single operand, | |||
702 | // the basis, therefore TargetType is a nullptr. | |||
703 | if (Operands.empty()) | |||
704 | return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; | |||
705 | ||||
706 | for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { | |||
707 | TargetType = GTI.getIndexedType(); | |||
708 | // We assume that the cost of Scalar GEP with constant index and the | |||
709 | // cost of Vector GEP with splat constant index are the same. | |||
710 | const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); | |||
711 | if (!ConstIdx) | |||
712 | if (auto Splat = getSplatValue(*I)) | |||
713 | ConstIdx = dyn_cast<ConstantInt>(Splat); | |||
714 | if (StructType *STy = GTI.getStructTypeOrNull()) { | |||
715 | // For structures the index is always splat or scalar constant | |||
716 | assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast< void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Analysis/TargetTransformInfoImpl.h" , 716, __PRETTY_FUNCTION__)); | |||
717 | uint64_t Field = ConstIdx->getZExtValue(); | |||
718 | BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); | |||
719 | } else { | |||
720 | int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); | |||
721 | if (ConstIdx) { | |||
722 | BaseOffset += | |||
723 | ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; | |||
724 | } else { | |||
725 | // Needs scale register. | |||
726 | if (Scale != 0) | |||
727 | // No addressing mode takes two scale registers. | |||
728 | return TTI::TCC_Basic; | |||
729 | Scale = ElementSize; | |||
730 | } | |||
731 | } | |||
732 | } | |||
733 | ||||
734 | // Assumes the address space is 0 when Ptr is nullptr. | |||
735 | unsigned AS = | |||
736 | (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); | |||
737 | ||||
738 | if (static_cast<T *>(this)->isLegalAddressingMode( | |||
739 | TargetType, const_cast<GlobalValue *>(BaseGV), | |||
740 | BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) | |||
741 | return TTI::TCC_Free; | |||
742 | return TTI::TCC_Basic; | |||
743 | } | |||
744 | ||||
745 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | |||
746 | ArrayRef<Type *> ParamTys, const User *U) { | |||
747 | switch (IID) { | |||
748 | default: | |||
749 | // Intrinsics rarely (if ever) have normal argument setup constraints. | |||
750 | // Model them as having a basic instruction cost. | |||
751 | return TTI::TCC_Basic; | |||
752 | ||||
753 | // TODO: other libc intrinsics. | |||
754 | case Intrinsic::memcpy: | |||
755 | return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U)); | |||
756 | ||||
757 | case Intrinsic::annotation: | |||
758 | case Intrinsic::assume: | |||
759 | case Intrinsic::sideeffect: | |||
760 | case Intrinsic::dbg_declare: | |||
761 | case Intrinsic::dbg_value: | |||
762 | case Intrinsic::dbg_label: | |||
763 | case Intrinsic::invariant_start: | |||
764 | case Intrinsic::invariant_end: | |||
765 | case Intrinsic::launder_invariant_group: | |||
766 | case Intrinsic::strip_invariant_group: | |||
767 | case Intrinsic::is_constant: | |||
768 | case Intrinsic::lifetime_start: | |||
769 | case Intrinsic::lifetime_end: | |||
770 | case Intrinsic::objectsize: | |||
771 | case Intrinsic::ptr_annotation: | |||
772 | case Intrinsic::var_annotation: | |||
773 | case Intrinsic::experimental_gc_result: | |||
774 | case Intrinsic::experimental_gc_relocate: | |||
775 | case Intrinsic::coro_alloc: | |||
776 | case Intrinsic::coro_begin: | |||
777 | case Intrinsic::coro_free: | |||
778 | case Intrinsic::coro_end: | |||
779 | case Intrinsic::coro_frame: | |||
780 | case Intrinsic::coro_size: | |||
781 | case Intrinsic::coro_suspend: | |||
782 | case Intrinsic::coro_param: | |||
783 | case Intrinsic::coro_subfn_addr: | |||
784 | // These intrinsics don't actually represent code after lowering. | |||
785 | return TTI::TCC_Free; | |||
786 | } | |||
787 | } | |||
788 | ||||
789 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | |||
790 | ArrayRef<const Value *> Arguments, const User *U) { | |||
791 | // Delegate to the generic intrinsic handling code. This mostly provides an | |||
792 | // opportunity for targets to (for example) special case the cost of | |||
793 | // certain intrinsics based on constants used as arguments. | |||
794 | SmallVector<Type *, 8> ParamTys; | |||
795 | ParamTys.reserve(Arguments.size()); | |||
796 | for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) | |||
797 | ParamTys.push_back(Arguments[Idx]->getType()); | |||
798 | return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); | |||
799 | } | |||
800 | ||||
801 | unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { | |||
802 | if (isa<PHINode>(U)) | |||
803 | return TTI::TCC_Free; // Model all PHI nodes as free. | |||
804 | ||||
805 | // Static alloca doesn't generate target instructions. | |||
806 | if (auto *A = dyn_cast<AllocaInst>(U)) | |||
807 | if (A->isStaticAlloca()) | |||
808 | return TTI::TCC_Free; | |||
809 | ||||
810 | if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { | |||
811 | return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), | |||
812 | GEP->getPointerOperand(), | |||
813 | Operands.drop_front()); | |||
814 | } | |||
815 | ||||
816 | if (auto CS = ImmutableCallSite(U)) { | |||
817 | const Function *F = CS.getCalledFunction(); | |||
818 | if (!F) { | |||
819 | // Just use the called value type. | |||
820 | Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); | |||
821 | return static_cast<T *>(this) | |||
822 | ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U); | |||
823 | } | |||
824 | ||||
825 | SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); | |||
826 | return static_cast<T *>(this)->getCallCost(F, Arguments, U); | |||
827 | } | |||
828 | ||||
829 | if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U)) | |||
830 | // The old behaviour of generally treating extensions of icmp to be free | |||
831 | // has been removed. A target that needs it should override getUserCost(). | |||
832 | return static_cast<T *>(this)->getExtCost(cast<Instruction>(U), | |||
833 | Operands.back()); | |||
834 | ||||
835 | return static_cast<T *>(this)->getOperationCost( | |||
836 | Operator::getOpcode(U), U->getType(), | |||
837 | U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); | |||
838 | } | |||
839 | ||||
840 | int getInstructionLatency(const Instruction *I) { | |||
841 | SmallVector<const Value *, 4> Operands(I->value_op_begin(), | |||
842 | I->value_op_end()); | |||
843 | if (getUserCost(I, Operands) == TTI::TCC_Free) | |||
844 | return 0; | |||
845 | ||||
846 | if (isa<LoadInst>(I)) | |||
847 | return 4; | |||
848 | ||||
849 | Type *DstTy = I->getType(); | |||
850 | ||||
851 | // Usually an intrinsic is a simple instruction. | |||
852 | // A real function call is much slower. | |||
853 | if (auto *CI = dyn_cast<CallInst>(I)) { | |||
854 | const Function *F = CI->getCalledFunction(); | |||
855 | if (!F || static_cast<T *>(this)->isLoweredToCall(F)) | |||
856 | return 40; | |||
857 | // Some intrinsics return a value and a flag, we use the value type | |||
858 | // to decide its latency. | |||
859 | if (StructType* StructTy = dyn_cast<StructType>(DstTy)) | |||
860 | DstTy = StructTy->getElementType(0); | |||
861 | // Fall through to simple instructions. | |||
862 | } | |||
863 | ||||
864 | if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) | |||
865 | DstTy = VectorTy->getElementType(); | |||
866 | if (DstTy->isFloatingPointTy()) | |||
867 | return 3; | |||
868 | ||||
869 | return 1; | |||
870 | } | |||
871 | }; | |||
872 | } | |||
873 | ||||
874 | #endif |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file provides a helper that implements much of the TTI interface in |
11 | /// terms of the target-independent code generator and TargetLowering |
12 | /// interfaces. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | #include "llvm/CodeGen/TargetLowering.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/CodeGen/ValueTypes.h" |
31 | #include "llvm/IR/BasicBlock.h" |
32 | #include "llvm/IR/CallSite.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/MC/MCSchedule.h" |
45 | #include "llvm/Support/Casting.h" |
46 | #include "llvm/Support/CommandLine.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include "llvm/Support/MachineValueType.h" |
49 | #include "llvm/Support/MathExtras.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | /// Base class which can be used to help build a TTI implementation. |
68 | /// |
69 | /// This class provides as much implementation of the TTI interface as is |
70 | /// possible using the target independent parts of the code generator. |
71 | /// |
72 | /// In order to subclass it, your class must implement a getST() method to |
73 | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | /// We need these methods implemented in the derived class so that this class |
75 | /// doesn't have to duplicate storage for them. |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | /// operations. |
84 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 85, __PRETTY_FUNCTION__)); |
86 | unsigned Cost = 0; |
87 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | // plus the cost of inserting it into every element of the result vector. |
89 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | Instruction::ExtractElement, Ty, 0); |
91 | |
92 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
93 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | Instruction::InsertElement, Ty, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | /// operations. |
101 | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 102, __PRETTY_FUNCTION__)); |
103 | unsigned Cost = 0; |
104 | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | // plus the cost of inserting them onto the result vector. |
106 | |
107 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | // vector and finally index 3 of second vector and insert them at index |
110 | // <0,1,2,3> of result vector. |
111 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
112 | Cost += static_cast<T *>(this) |
113 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | Cost += static_cast<T *>(this) |
115 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | } |
117 | return Cost; |
118 | } |
119 | |
120 | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | /// insert operations. |
122 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)) |
124 | "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)); |
125 | int NumSubElts = SubTy->getVectorNumElements(); |
126 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)) |
127 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)); |
128 | |
129 | unsigned Cost = 0; |
130 | // Subvector extraction cost is equal to the cost of extracting element from |
131 | // the source type plus the cost of inserting them into the result vector |
132 | // type. |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | Instruction::ExtractElement, Ty, i + Index); |
136 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | Instruction::InsertElement, SubTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | /// insert operations. |
144 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) |
146 | "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); |
147 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)) |
149 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)); |
150 | |
151 | unsigned Cost = 0; |
152 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | // the source type plus the cost of inserting them into the result vector |
154 | // type. |
155 | for (int i = 0; i != NumSubElts; ++i) { |
156 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | Instruction::ExtractElement, SubTy, i); |
158 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | Instruction::InsertElement, Ty, i + Index); |
160 | } |
161 | return Cost; |
162 | } |
163 | |
164 | /// Local query method delegates up to T which *must* implement this! |
165 | const TargetSubtargetInfo *getST() const { |
166 | return static_cast<const T *>(this)->getST(); |
167 | } |
168 | |
169 | /// Local query method delegates up to T which *must* implement this! |
170 | const TargetLoweringBase *getTLI() const { |
171 | return static_cast<const T *>(this)->getTLI(); |
172 | } |
173 | |
174 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | switch (M) { |
176 | case TTI::MIM_Unindexed: |
177 | return ISD::UNINDEXED; |
178 | case TTI::MIM_PreInc: |
179 | return ISD::PRE_INC; |
180 | case TTI::MIM_PreDec: |
181 | return ISD::PRE_DEC; |
182 | case TTI::MIM_PostInc: |
183 | return ISD::POST_INC; |
184 | case TTI::MIM_PostDec: |
185 | return ISD::POST_DEC; |
186 | } |
187 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 187); |
188 | } |
189 | |
190 | protected: |
191 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | : BaseT(DL) {} |
193 | |
194 | using TargetTransformInfoImplBase::DL; |
195 | |
196 | public: |
197 | /// \name Scalar TTI Implementations |
198 | /// @{ |
199 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, |
200 | unsigned BitWidth, unsigned AddressSpace, |
201 | unsigned Alignment, bool *Fast) const { |
202 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
203 | return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast); |
204 | } |
205 | |
206 | bool hasBranchDivergence() { return false; } |
207 | |
208 | bool isSourceOfDivergence(const Value *V) { return false; } |
209 | |
210 | bool isAlwaysUniform(const Value *V) { return false; } |
211 | |
212 | unsigned getFlatAddressSpace() { |
213 | // Return an invalid address space. |
214 | return -1; |
215 | } |
216 | |
217 | bool isLegalAddImmediate(int64_t imm) { |
218 | return getTLI()->isLegalAddImmediate(imm); |
219 | } |
220 | |
221 | bool isLegalICmpImmediate(int64_t imm) { |
222 | return getTLI()->isLegalICmpImmediate(imm); |
223 | } |
224 | |
225 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
226 | bool HasBaseReg, int64_t Scale, |
227 | unsigned AddrSpace, Instruction *I = nullptr) { |
228 | TargetLoweringBase::AddrMode AM; |
229 | AM.BaseGV = BaseGV; |
230 | AM.BaseOffs = BaseOffset; |
231 | AM.HasBaseReg = HasBaseReg; |
232 | AM.Scale = Scale; |
233 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
234 | } |
235 | |
236 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
237 | const DataLayout &DL) const { |
238 | EVT VT = getTLI()->getValueType(DL, Ty); |
239 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
240 | } |
241 | |
242 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
243 | const DataLayout &DL) const { |
244 | EVT VT = getTLI()->getValueType(DL, Ty); |
245 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
246 | } |
247 | |
248 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
249 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
250 | } |
251 | |
252 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
253 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
254 | TargetLoweringBase::AddrMode AM; |
255 | AM.BaseGV = BaseGV; |
256 | AM.BaseOffs = BaseOffset; |
257 | AM.HasBaseReg = HasBaseReg; |
258 | AM.Scale = Scale; |
259 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
260 | } |
261 | |
262 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
263 | return getTLI()->isTruncateFree(Ty1, Ty2); |
264 | } |
265 | |
266 | bool isProfitableToHoist(Instruction *I) { |
267 | return getTLI()->isProfitableToHoist(I); |
268 | } |
269 | |
270 | bool useAA() const { return getST()->useAA(); } |
271 | |
272 | bool isTypeLegal(Type *Ty) { |
273 | EVT VT = getTLI()->getValueType(DL, Ty); |
274 | return getTLI()->isTypeLegal(VT); |
275 | } |
276 | |
277 | int getGEPCost(Type *PointeeType, const Value *Ptr, |
278 | ArrayRef<const Value *> Operands) { |
279 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
280 | } |
281 | |
282 | int getExtCost(const Instruction *I, const Value *Src) { |
283 | if (getTLI()->isExtFree(I)) |
284 | return TargetTransformInfo::TCC_Free; |
285 | |
286 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) |
287 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
288 | if (getTLI()->isExtLoad(LI, I, DL)) |
289 | return TargetTransformInfo::TCC_Free; |
290 | |
291 | return TargetTransformInfo::TCC_Basic; |
292 | } |
293 | |
294 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
295 | ArrayRef<const Value *> Arguments, const User *U) { |
296 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
297 | } |
298 | |
299 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
300 | ArrayRef<Type *> ParamTys, const User *U) { |
301 | if (IID == Intrinsic::cttz) { |
302 | if (getTLI()->isCheapToSpeculateCttz()) |
303 | return TargetTransformInfo::TCC_Basic; |
304 | return TargetTransformInfo::TCC_Expensive; |
305 | } |
306 | |
307 | if (IID == Intrinsic::ctlz) { |
308 | if (getTLI()->isCheapToSpeculateCtlz()) |
309 | return TargetTransformInfo::TCC_Basic; |
310 | return TargetTransformInfo::TCC_Expensive; |
311 | } |
312 | |
313 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
314 | } |
315 | |
316 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
317 | unsigned &JumpTableSize) { |
318 | /// Try to find the estimated number of clusters. Note that the number of |
319 | /// clusters identified in this function could be different from the actural |
320 | /// numbers found in lowering. This function ignore switches that are |
321 | /// lowered with a mix of jump table / bit test / BTree. This function was |
322 | /// initially intended to be used when estimating the cost of switch in |
323 | /// inline cost heuristic, but it's a generic cost model to be used in other |
324 | /// places (e.g., in loop unrolling). |
325 | unsigned N = SI.getNumCases(); |
326 | const TargetLoweringBase *TLI = getTLI(); |
327 | const DataLayout &DL = this->getDataLayout(); |
328 | |
329 | JumpTableSize = 0; |
330 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
331 | |
332 | // Early exit if both a jump table and bit test are not allowed. |
333 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
334 | return N; |
335 | |
336 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
337 | APInt MinCaseVal = MaxCaseVal; |
338 | for (auto CI : SI.cases()) { |
339 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
340 | if (CaseVal.sgt(MaxCaseVal)) |
341 | MaxCaseVal = CaseVal; |
342 | if (CaseVal.slt(MinCaseVal)) |
343 | MinCaseVal = CaseVal; |
344 | } |
345 | |
346 | // Check if suitable for a bit test |
347 | if (N <= DL.getIndexSizeInBits(0u)) { |
348 | SmallPtrSet<const BasicBlock *, 4> Dests; |
349 | for (auto I : SI.cases()) |
350 | Dests.insert(I.getCaseSuccessor()); |
351 | |
352 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
353 | DL)) |
354 | return 1; |
355 | } |
356 | |
357 | // Check if suitable for a jump table. |
358 | if (IsJTAllowed) { |
359 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
360 | return N; |
361 | uint64_t Range = |
362 | (MaxCaseVal - MinCaseVal) |
363 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
364 | // Check whether a range of clusters is dense enough for a jump table |
365 | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { |
366 | JumpTableSize = Range; |
367 | return 1; |
368 | } |
369 | } |
370 | return N; |
371 | } |
372 | |
373 | unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } |
374 | |
375 | unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } |
376 | |
377 | bool shouldBuildLookupTables() { |
378 | const TargetLoweringBase *TLI = getTLI(); |
379 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
380 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
381 | } |
382 | |
383 | bool haveFastSqrt(Type *Ty) { |
384 | const TargetLoweringBase *TLI = getTLI(); |
385 | EVT VT = TLI->getValueType(DL, Ty); |
386 | return TLI->isTypeLegal(VT) && |
387 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
388 | } |
389 | |
390 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
391 | return true; |
392 | } |
393 | |
394 | unsigned getFPOpCost(Type *Ty) { |
395 | // Check whether FADD is available, as a proxy for floating-point in |
396 | // general. |
397 | const TargetLoweringBase *TLI = getTLI(); |
398 | EVT VT = TLI->getValueType(DL, Ty); |
399 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
400 | return TargetTransformInfo::TCC_Basic; |
401 | return TargetTransformInfo::TCC_Expensive; |
402 | } |
403 | |
404 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
405 | const TargetLoweringBase *TLI = getTLI(); |
406 | switch (Opcode) { |
407 | default: break; |
408 | case Instruction::Trunc: |
409 | if (TLI->isTruncateFree(OpTy, Ty)) |
410 | return TargetTransformInfo::TCC_Free; |
411 | return TargetTransformInfo::TCC_Basic; |
412 | case Instruction::ZExt: |
413 | if (TLI->isZExtFree(OpTy, Ty)) |
414 | return TargetTransformInfo::TCC_Free; |
415 | return TargetTransformInfo::TCC_Basic; |
416 | |
417 | case Instruction::AddrSpaceCast: |
418 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), |
419 | Ty->getPointerAddressSpace())) |
420 | return TargetTransformInfo::TCC_Free; |
421 | return TargetTransformInfo::TCC_Basic; |
422 | } |
423 | |
424 | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
425 | } |
426 | |
427 | unsigned getInliningThresholdMultiplier() { return 1; } |
428 | |
429 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
430 | TTI::UnrollingPreferences &UP) { |
431 | // This unrolling functionality is target independent, but to provide some |
432 | // motivation for its intended use, for x86: |
433 | |
434 | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
435 | // Manual, Intel Core models and later have a loop stream detector (and |
436 | // associated uop queue) that can benefit from partial unrolling. |
437 | // The relevant requirements are: |
438 | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
439 | // taken, and none of them may be calls. |
440 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
441 | |
442 | // According to the Software Optimization Guide for AMD Family 15h |
443 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
444 | // and loop buffer which can benefit from partial unrolling. |
445 | // The relevant requirements are: |
446 | // - The loop must have fewer than 16 branches |
447 | // - The loop must have less than 40 uops in all executed loop branches |
448 | |
449 | // The number of taken branches in a loop is hard to estimate here, and |
450 | // benchmarking has revealed that it is better not to be conservative when |
451 | // estimating the branch count. As a result, we'll ignore the branch limits |
452 | // until someone finds a case where it matters in practice. |
453 | |
454 | unsigned MaxOps; |
455 | const TargetSubtargetInfo *ST = getST(); |
456 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
457 | MaxOps = PartialUnrollingThreshold; |
458 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
459 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
460 | else |
461 | return; |
462 | |
463 | // Scan the loop: don't unroll loops with calls. |
464 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; |
465 | ++I) { |
466 | BasicBlock *BB = *I; |
467 | |
468 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) |
469 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { |
470 | ImmutableCallSite CS(&*J); |
471 | if (const Function *F = CS.getCalledFunction()) { |
472 | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
473 | continue; |
474 | } |
475 | |
476 | return; |
477 | } |
478 | } |
479 | |
480 | // Enable runtime and partial unrolling up to the specified size. |
481 | // Enable using trip count upper bound to unroll loops. |
482 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
483 | UP.PartialThreshold = MaxOps; |
484 | |
485 | // Avoid unrolling when optimizing for size. |
486 | UP.OptSizeThreshold = 0; |
487 | UP.PartialOptSizeThreshold = 0; |
488 | |
489 | // Set number of instructions optimized when "back edge" |
490 | // becomes "fall through" to default value of 2. |
491 | UP.BEInsns = 2; |
492 | } |
493 | |
494 | int getInstructionLatency(const Instruction *I) { |
495 | if (isa<LoadInst>(I)) |
496 | return getST()->getSchedModel().DefaultLoadLatency; |
497 | |
498 | return BaseT::getInstructionLatency(I); |
499 | } |
500 | |
501 | /// @} |
502 | |
503 | /// \name Vector TTI Implementations |
504 | /// @{ |
505 | |
506 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } |
507 | |
508 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
509 | |
510 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
511 | /// are set if the result needs to be inserted and/or extracted from vectors. |
512 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
513 | assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 513, __PRETTY_FUNCTION__)); |
514 | unsigned Cost = 0; |
515 | |
516 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
517 | if (Insert) |
518 | Cost += static_cast<T *>(this) |
519 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
520 | if (Extract) |
521 | Cost += static_cast<T *>(this) |
522 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
523 | } |
524 | |
525 | return Cost; |
526 | } |
527 | |
528 | /// Estimate the overhead of scalarizing an instructions unique |
529 | /// non-constant operands. The types of the arguments are ordinarily |
530 | /// scalar, in which case the costs are multiplied with VF. |
531 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
532 | unsigned VF) { |
533 | unsigned Cost = 0; |
534 | SmallPtrSet<const Value*, 4> UniqueOperands; |
535 | for (const Value *A : Args) { |
536 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
537 | Type *VecTy = nullptr; |
538 | if (A->getType()->isVectorTy()) { |
539 | VecTy = A->getType(); |
540 | // If A is a vector operand, VF should be 1 or correspond to A. |
541 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 542, __PRETTY_FUNCTION__)) |
542 | "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 542, __PRETTY_FUNCTION__)); |
543 | } |
544 | else |
545 | VecTy = VectorType::get(A->getType(), VF); |
546 | |
547 | Cost += getScalarizationOverhead(VecTy, false, true); |
548 | } |
549 | } |
550 | |
551 | return Cost; |
552 | } |
553 | |
554 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
555 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 555, __PRETTY_FUNCTION__)); |
556 | |
557 | unsigned Cost = 0; |
558 | |
559 | Cost += getScalarizationOverhead(VecTy, true, false); |
560 | if (!Args.empty()) |
561 | Cost += getOperandsScalarizationOverhead(Args, |
562 | VecTy->getVectorNumElements()); |
563 | else |
564 | // When no information on arguments is provided, we add the cost |
565 | // associated with one argument as a heuristic. |
566 | Cost += getScalarizationOverhead(VecTy, false, true); |
567 | |
568 | return Cost; |
569 | } |
570 | |
571 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
572 | |
573 | unsigned getArithmeticInstrCost( |
574 | unsigned Opcode, Type *Ty, |
575 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
576 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
577 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
578 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
579 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { |
580 | // Check if any of the operands are vector operands. |
581 | const TargetLoweringBase *TLI = getTLI(); |
582 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
583 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 583, __PRETTY_FUNCTION__)); |
584 | |
585 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
586 | |
587 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
588 | // Assume that floating point arithmetic operations cost twice as much as |
589 | // integer operations. |
590 | unsigned OpCost = (IsFloat ? 2 : 1); |
591 | |
592 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
593 | // The operation is legal. Assume it costs 1. |
594 | // TODO: Once we have extract/insert subvector cost we need to use them. |
595 | return LT.first * OpCost; |
596 | } |
597 | |
598 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
599 | // If the operation is custom lowered, then assume that the code is twice |
600 | // as expensive. |
601 | return LT.first * 2 * OpCost; |
602 | } |
603 | |
604 | // Else, assume that we need to scalarize this op. |
605 | // TODO: If one of the types get legalized by splitting, handle this |
606 | // similarly to what getCastInstrCost() does. |
607 | if (Ty->isVectorTy()) { |
608 | unsigned Num = Ty->getVectorNumElements(); |
609 | unsigned Cost = static_cast<T *>(this) |
610 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
611 | // Return the cost of multiple scalar invocation plus the cost of |
612 | // inserting and extracting the values. |
613 | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
614 | } |
615 | |
616 | // We don't know anything about this scalar instruction. |
617 | return OpCost; |
618 | } |
619 | |
620 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
621 | Type *SubTp) { |
622 | switch (Kind) { |
623 | case TTI::SK_Broadcast: |
624 | return getBroadcastShuffleOverhead(Tp); |
625 | case TTI::SK_Select: |
626 | case TTI::SK_Reverse: |
627 | case TTI::SK_Transpose: |
628 | case TTI::SK_PermuteSingleSrc: |
629 | case TTI::SK_PermuteTwoSrc: |
630 | return getPermuteShuffleOverhead(Tp); |
631 | case TTI::SK_ExtractSubvector: |
632 | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
633 | case TTI::SK_InsertSubvector: |
634 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
635 | } |
636 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 636); |
637 | } |
638 | |
639 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
640 | const Instruction *I = nullptr) { |
641 | const TargetLoweringBase *TLI = getTLI(); |
642 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
643 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 643, __PRETTY_FUNCTION__)); |
644 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
645 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
646 | |
647 | // Check for NOOP conversions. |
648 | if (SrcLT.first == DstLT.first && |
649 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
650 | |
651 | // Bitcast between types that are legalized to the same type are free. |
652 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) |
653 | return 0; |
654 | } |
655 | |
656 | if (Opcode == Instruction::Trunc && |
657 | TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
658 | return 0; |
659 | |
660 | if (Opcode == Instruction::ZExt && |
661 | TLI->isZExtFree(SrcLT.second, DstLT.second)) |
662 | return 0; |
663 | |
664 | if (Opcode == Instruction::AddrSpaceCast && |
665 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
666 | Dst->getPointerAddressSpace())) |
667 | return 0; |
668 | |
669 | // If this is a zext/sext of a load, return 0 if the corresponding |
670 | // extending load exists on target. |
671 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && |
672 | I && isa<LoadInst>(I->getOperand(0))) { |
673 | EVT ExtVT = EVT::getEVT(Dst); |
674 | EVT LoadVT = EVT::getEVT(Src); |
675 | unsigned LType = |
676 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
677 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
678 | return 0; |
679 | } |
680 | |
681 | // If the cast is marked as legal (or promote) then assume low cost. |
682 | if (SrcLT.first == DstLT.first && |
683 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
684 | return 1; |
685 | |
686 | // Handle scalar conversions. |
687 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { |
688 | // Scalar bitcasts are usually free. |
689 | if (Opcode == Instruction::BitCast) |
690 | return 0; |
691 | |
692 | // Just check the op cost. If the operation is legal then assume it costs |
693 | // 1. |
694 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
695 | return 1; |
696 | |
697 | // Assume that illegal scalar instruction are expensive. |
698 | return 4; |
699 | } |
700 | |
701 | // Check vector-to-vector casts. |
702 | if (Dst->isVectorTy() && Src->isVectorTy()) { |
703 | // If the cast is between same-sized registers, then the check is simple. |
704 | if (SrcLT.first == DstLT.first && |
705 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
706 | |
707 | // Assume that Zext is done using AND. |
708 | if (Opcode == Instruction::ZExt) |
709 | return 1; |
710 | |
711 | // Assume that sext is done using SHL and SRA. |
712 | if (Opcode == Instruction::SExt) |
713 | return 2; |
714 | |
715 | // Just check the op cost. If the operation is legal then assume it |
716 | // costs |
717 | // 1 and multiply by the type-legalization overhead. |
718 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
719 | return SrcLT.first * 1; |
720 | } |
721 | |
722 | // If we are legalizing by splitting, query the concrete TTI for the cost |
723 | // of casting the original vector twice. We also need to factor in the |
724 | // cost of the split itself. Count that as 1, to be consistent with |
725 | // TLI->getTypeLegalizationCost(). |
726 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
727 | TargetLowering::TypeSplitVector) || |
728 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
729 | TargetLowering::TypeSplitVector)) { |
730 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
731 | Dst->getVectorNumElements() / 2); |
732 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
733 | Src->getVectorNumElements() / 2); |
734 | T *TTI = static_cast<T *>(this); |
735 | return TTI->getVectorSplitCost() + |
736 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
737 | } |
738 | |
739 | // In other cases where the source or destination are illegal, assume |
740 | // the operation will get scalarized. |
741 | unsigned Num = Dst->getVectorNumElements(); |
742 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
743 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
744 | |
745 | // Return the cost of multiple scalar invocation plus the cost of |
746 | // inserting and extracting the values. |
747 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
748 | } |
749 | |
750 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
751 | // This |
752 | // is where we handle bitcast between vectors and scalars. We need to assume |
753 | // that the conversion is scalarized in one way or another. |
754 | if (Opcode == Instruction::BitCast) |
755 | // Illegal bitcasts are done by storing and loading from a stack slot. |
756 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
757 | : 0) + |
758 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
759 | : 0); |
760 | |
761 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 761); |
762 | } |
763 | |
764 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
765 | VectorType *VecTy, unsigned Index) { |
766 | return static_cast<T *>(this)->getVectorInstrCost( |
767 | Instruction::ExtractElement, VecTy, Index) + |
768 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
769 | VecTy->getElementType()); |
770 | } |
771 | |
772 | unsigned getCFInstrCost(unsigned Opcode) { |
773 | // Branches are assumed to be predicted. |
774 | return 0; |
775 | } |
776 | |
777 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
778 | const Instruction *I) { |
779 | const TargetLoweringBase *TLI = getTLI(); |
780 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
781 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 781, __PRETTY_FUNCTION__)); |
782 | |
783 | // Selects on vectors are actually vector selects. |
784 | if (ISD == ISD::SELECT) { |
785 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 785, __PRETTY_FUNCTION__)); |
786 | if (CondTy->isVectorTy()) |
787 | ISD = ISD::VSELECT; |
788 | } |
789 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
790 | |
791 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
792 | !TLI->isOperationExpand(ISD, LT.second)) { |
793 | // The operation is legal. Assume it costs 1. Multiply |
794 | // by the type-legalization overhead. |
795 | return LT.first * 1; |
796 | } |
797 | |
798 | // Otherwise, assume that the cast is scalarized. |
799 | // TODO: If one of the types get legalized by splitting, handle this |
800 | // similarly to what getCastInstrCost() does. |
801 | if (ValTy->isVectorTy()) { |
802 | unsigned Num = ValTy->getVectorNumElements(); |
803 | if (CondTy) |
804 | CondTy = CondTy->getScalarType(); |
805 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
806 | Opcode, ValTy->getScalarType(), CondTy, I); |
807 | |
808 | // Return the cost of multiple scalar invocation plus the cost of |
809 | // inserting and extracting the values. |
810 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
811 | } |
812 | |
813 | // Unknown scalar opcode. |
814 | return 1; |
815 | } |
816 | |
817 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
818 | std::pair<unsigned, MVT> LT = |
819 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
820 | |
821 | return LT.first; |
822 | } |
823 | |
824 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, |
825 | unsigned AddressSpace, const Instruction *I = nullptr) { |
826 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 826, __PRETTY_FUNCTION__)); |
827 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
828 | |
829 | // Assuming that all loads of legal types cost 1. |
830 | unsigned Cost = LT.first; |
831 | |
832 | if (Src->isVectorTy() && |
833 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { |
834 | // This is a vector load that legalizes to a larger type than the vector |
835 | // itself. Unless the corresponding extending load or truncating store is |
836 | // legal, then this will scalarize. |
837 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
838 | EVT MemVT = getTLI()->getValueType(DL, Src); |
839 | if (Opcode == Instruction::Store) |
840 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
841 | else |
842 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
843 | |
844 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
845 | // This is a vector load/store for some illegal type that is scalarized. |
846 | // We must account for the cost of building or decomposing the vector. |
847 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
848 | Opcode == Instruction::Store); |
849 | } |
850 | } |
851 | |
852 | return Cost; |
853 | } |
854 | |
855 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
856 | unsigned Factor, |
857 | ArrayRef<unsigned> Indices, |
858 | unsigned Alignment, unsigned AddressSpace, |
859 | bool UseMaskForCond = false, |
860 | bool UseMaskForGaps = false) { |
861 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
862 | assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 862, __PRETTY_FUNCTION__)); |
863 | |
864 | unsigned NumElts = VT->getNumElements(); |
865 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 865, __PRETTY_FUNCTION__)); |
866 | |
867 | unsigned NumSubElts = NumElts / Factor; |
868 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
869 | |
870 | // Firstly, the cost of load/store operation. |
871 | unsigned Cost; |
872 | if (UseMaskForCond || UseMaskForGaps) |
873 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
874 | Opcode, VecTy, Alignment, AddressSpace); |
875 | else |
876 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, |
877 | AddressSpace); |
878 | |
879 | // Legalize the vector type, and get the legalized and unlegalized type |
880 | // sizes. |
881 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
882 | unsigned VecTySize = |
883 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
884 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
885 | |
886 | // Return the ceiling of dividing A by B. |
887 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
888 | |
889 | // Scale the cost of the memory operation by the fraction of legalized |
890 | // instructions that will actually be used. We shouldn't account for the |
891 | // cost of dead instructions since they will be removed. |
892 | // |
893 | // E.g., An interleaved load of factor 8: |
894 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
895 | // %v0 = shufflevector %vec, undef, <0, 8> |
896 | // |
897 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
898 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
899 | // type). The other loads are unused. |
900 | // |
901 | // We only scale the cost of loads since interleaved store groups aren't |
902 | // allowed to have gaps. |
903 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { |
904 | // The number of loads of a legal type it will take to represent a load |
905 | // of the unlegalized vector type. |
906 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
907 | |
908 | // The number of elements of the unlegalized type that correspond to a |
909 | // single legal instruction. |
910 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
911 | |
912 | // Determine which legal instructions will be used. |
913 | BitVector UsedInsts(NumLegalInsts, false); |
914 | for (unsigned Index : Indices) |
915 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
916 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
917 | |
918 | // Scale the cost of the load by the fraction of legal instructions that |
919 | // will be used. |
920 | Cost *= UsedInsts.count() / NumLegalInsts; |
921 | } |
922 | |
923 | // Then plus the cost of interleave operation. |
924 | if (Opcode == Instruction::Load) { |
925 | // The interleave cost is similar to extract sub vectors' elements |
926 | // from the wide vector, and insert them into sub vectors. |
927 | // |
928 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
929 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
930 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
931 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
932 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
933 | |
934 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 935, __PRETTY_FUNCTION__)) |
935 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 935, __PRETTY_FUNCTION__)); |
936 | |
937 | for (unsigned Index : Indices) { |
938 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 938, __PRETTY_FUNCTION__)); |
939 | |
940 | // Extract elements from loaded vector for each sub vector. |
941 | for (unsigned i = 0; i < NumSubElts; i++) |
942 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
943 | Instruction::ExtractElement, VT, Index + i * Factor); |
944 | } |
945 | |
946 | unsigned InsSubCost = 0; |
947 | for (unsigned i = 0; i < NumSubElts; i++) |
948 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
949 | Instruction::InsertElement, SubVT, i); |
950 | |
951 | Cost += Indices.size() * InsSubCost; |
952 | } else { |
953 | // The interleave cost is extract all elements from sub vectors, and |
954 | // insert them into the wide vector. |
955 | // |
956 | // E.g. An interleaved store of factor 2: |
957 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
958 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
959 | // The cost is estimated as extract all elements from both <4 x i32> |
960 | // vectors and insert into the <8 x i32> vector. |
961 | |
962 | unsigned ExtSubCost = 0; |
963 | for (unsigned i = 0; i < NumSubElts; i++) |
964 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
965 | Instruction::ExtractElement, SubVT, i); |
966 | Cost += ExtSubCost * Factor; |
967 | |
968 | for (unsigned i = 0; i < NumElts; i++) |
969 | Cost += static_cast<T *>(this) |
970 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
971 | } |
972 | |
973 | if (!UseMaskForCond) |
974 | return Cost; |
975 | |
976 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
977 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
978 | SubVT = VectorType::get(I8Type, NumSubElts); |
979 | |
980 | // The Mask shuffling cost is extract all the elements of the Mask |
981 | // and insert each of them Factor times into the wide vector: |
982 | // |
983 | // E.g. an interleaved group with factor 3: |
984 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
985 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
986 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
987 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
988 | // vector and insert them factor times into the <24xi1> shuffled mask |
989 | // vector. |
990 | for (unsigned i = 0; i < NumSubElts; i++) |
991 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
992 | Instruction::ExtractElement, SubVT, i); |
993 | |
994 | for (unsigned i = 0; i < NumElts; i++) |
995 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
996 | Instruction::InsertElement, MaskVT, i); |
997 | |
998 | // The Gaps mask is invariant and created outside the loop, therefore the |
999 | // cost of creating it is not accounted for here. However if we have both |
1000 | // a MaskForGaps and some other mask that guards the execution of the |
1001 | // memory access, we need to account for the cost of And-ing the two masks |
1002 | // inside the loop. |
1003 | if (UseMaskForGaps) |
1004 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1005 | BinaryOperator::And, MaskVT); |
1006 | |
1007 | return Cost; |
1008 | } |
1009 | |
1010 | /// Get intrinsic cost based on arguments. |
1011 | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1012 | ArrayRef<Value *> Args, FastMathFlags FMF, |
1013 | unsigned VF = 1) { |
1014 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1015 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1015, __PRETTY_FUNCTION__)); |
1016 | auto *ConcreteTTI = static_cast<T *>(this); |
1017 | |
1018 | switch (IID) { |
1019 | default: { |
1020 | // Assume that we need to scalarize this intrinsic. |
1021 | SmallVector<Type *, 4> Types; |
1022 | for (Value *Op : Args) { |
1023 | Type *OpTy = Op->getType(); |
1024 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1024, __PRETTY_FUNCTION__)); |
1025 | Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); |
1026 | } |
1027 | |
1028 | if (VF > 1 && !RetTy->isVoidTy()) |
1029 | RetTy = VectorType::get(RetTy, VF); |
1030 | |
1031 | // Compute the scalarization overhead based on Args for a vector |
1032 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1033 | // CostModel will pass a vector RetTy and VF is 1. |
1034 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1035 | if (RetVF > 1 || VF > 1) { |
1036 | ScalarizationCost = 0; |
1037 | if (!RetTy->isVoidTy()) |
1038 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1039 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1040 | } |
1041 | |
1042 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1043 | ScalarizationCost); |
1044 | } |
1045 | case Intrinsic::masked_scatter: { |
1046 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1046, __PRETTY_FUNCTION__)); |
1047 | Value *Mask = Args[3]; |
1048 | bool VarMask = !isa<Constant>(Mask); |
1049 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1050 | return ConcreteTTI->getGatherScatterOpCost( |
1051 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1052 | } |
1053 | case Intrinsic::masked_gather: { |
1054 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1054, __PRETTY_FUNCTION__)); |
1055 | Value *Mask = Args[2]; |
1056 | bool VarMask = !isa<Constant>(Mask); |
1057 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1058 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1059 | Args[0], VarMask, Alignment); |
1060 | } |
1061 | case Intrinsic::experimental_vector_reduce_add: |
1062 | case Intrinsic::experimental_vector_reduce_mul: |
1063 | case Intrinsic::experimental_vector_reduce_and: |
1064 | case Intrinsic::experimental_vector_reduce_or: |
1065 | case Intrinsic::experimental_vector_reduce_xor: |
1066 | case Intrinsic::experimental_vector_reduce_fadd: |
1067 | case Intrinsic::experimental_vector_reduce_fmul: |
1068 | case Intrinsic::experimental_vector_reduce_smax: |
1069 | case Intrinsic::experimental_vector_reduce_smin: |
1070 | case Intrinsic::experimental_vector_reduce_fmax: |
1071 | case Intrinsic::experimental_vector_reduce_fmin: |
1072 | case Intrinsic::experimental_vector_reduce_umax: |
1073 | case Intrinsic::experimental_vector_reduce_umin: |
1074 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1075 | case Intrinsic::fshl: |
1076 | case Intrinsic::fshr: { |
1077 | Value *X = Args[0]; |
1078 | Value *Y = Args[1]; |
1079 | Value *Z = Args[2]; |
1080 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1081 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1082 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1083 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1084 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1085 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1086 | : TTI::OP_None; |
1087 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1088 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1089 | unsigned Cost = 0; |
1090 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1091 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1092 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1093 | OpKindX, OpKindZ, OpPropsX); |
1094 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1095 | OpKindY, OpKindZ, OpPropsY); |
1096 | // Non-constant shift amounts requires a modulo. |
1097 | if (OpKindZ != TTI::OK_UniformConstantValue && |
1098 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1099 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1100 | OpKindZ, OpKindBW, OpPropsZ, |
1101 | OpPropsBW); |
1102 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1103 | if (X != Y) { |
1104 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1105 | if (RetVF > 1) |
1106 | CondTy = VectorType::get(CondTy, RetVF); |
1107 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1108 | CondTy, nullptr); |
1109 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1110 | CondTy, nullptr); |
1111 | } |
1112 | return Cost; |
1113 | } |
1114 | } |
1115 | } |
1116 | |
1117 | /// Get intrinsic cost based on argument types. |
1118 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1119 | /// cost of scalarizing the arguments and the return value will be computed |
1120 | /// based on types. |
1121 | unsigned getIntrinsicInstrCost( |
1122 | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1123 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1124 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1125 | auto *ConcreteTTI = static_cast<T *>(this); |
1126 | |
1127 | SmallVector<unsigned, 2> ISDs; |
1128 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1129 | switch (IID) { |
1130 | default: { |
1131 | // Assume that we need to scalarize this intrinsic. |
1132 | unsigned ScalarizationCost = ScalarizationCostPassed; |
1133 | unsigned ScalarCalls = 1; |
1134 | Type *ScalarRetTy = RetTy; |
1135 | if (RetTy->isVectorTy()) { |
1136 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1137 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1138 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1139 | ScalarRetTy = RetTy->getScalarType(); |
1140 | } |
1141 | SmallVector<Type *, 4> ScalarTys; |
1142 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1143 | Type *Ty = Tys[i]; |
1144 | if (Ty->isVectorTy()) { |
1145 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1146 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1147 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1148 | Ty = Ty->getScalarType(); |
1149 | } |
1150 | ScalarTys.push_back(Ty); |
1151 | } |
1152 | if (ScalarCalls == 1) |
1153 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1154 | |
1155 | unsigned ScalarCost = |
1156 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1157 | |
1158 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1159 | } |
1160 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1161 | // intrinsic call. |
1162 | case Intrinsic::sqrt: |
1163 | ISDs.push_back(ISD::FSQRT); |
1164 | break; |
1165 | case Intrinsic::sin: |
1166 | ISDs.push_back(ISD::FSIN); |
1167 | break; |
1168 | case Intrinsic::cos: |
1169 | ISDs.push_back(ISD::FCOS); |
1170 | break; |
1171 | case Intrinsic::exp: |
1172 | ISDs.push_back(ISD::FEXP); |
1173 | break; |
1174 | case Intrinsic::exp2: |
1175 | ISDs.push_back(ISD::FEXP2); |
1176 | break; |
1177 | case Intrinsic::log: |
1178 | ISDs.push_back(ISD::FLOG); |
1179 | break; |
1180 | case Intrinsic::log10: |
1181 | ISDs.push_back(ISD::FLOG10); |
1182 | break; |
1183 | case Intrinsic::log2: |
1184 | ISDs.push_back(ISD::FLOG2); |
1185 | break; |
1186 | case Intrinsic::fabs: |
1187 | ISDs.push_back(ISD::FABS); |
1188 | break; |
1189 | case Intrinsic::canonicalize: |
1190 | ISDs.push_back(ISD::FCANONICALIZE); |
1191 | break; |
1192 | case Intrinsic::minnum: |
1193 | ISDs.push_back(ISD::FMINNUM); |
1194 | if (FMF.noNaNs()) |
1195 | ISDs.push_back(ISD::FMINIMUM); |
1196 | break; |
1197 | case Intrinsic::maxnum: |
1198 | ISDs.push_back(ISD::FMAXNUM); |
1199 | if (FMF.noNaNs()) |
1200 | ISDs.push_back(ISD::FMAXIMUM); |
1201 | break; |
1202 | case Intrinsic::copysign: |
1203 | ISDs.push_back(ISD::FCOPYSIGN); |
1204 | break; |
1205 | case Intrinsic::floor: |
1206 | ISDs.push_back(ISD::FFLOOR); |
1207 | break; |
1208 | case Intrinsic::ceil: |
1209 | ISDs.push_back(ISD::FCEIL); |
1210 | break; |
1211 | case Intrinsic::trunc: |
1212 | ISDs.push_back(ISD::FTRUNC); |
1213 | break; |
1214 | case Intrinsic::nearbyint: |
1215 | ISDs.push_back(ISD::FNEARBYINT); |
1216 | break; |
1217 | case Intrinsic::rint: |
1218 | ISDs.push_back(ISD::FRINT); |
1219 | break; |
1220 | case Intrinsic::round: |
1221 | ISDs.push_back(ISD::FROUND); |
1222 | break; |
1223 | case Intrinsic::pow: |
1224 | ISDs.push_back(ISD::FPOW); |
1225 | break; |
1226 | case Intrinsic::fma: |
1227 | ISDs.push_back(ISD::FMA); |
1228 | break; |
1229 | case Intrinsic::fmuladd: |
1230 | ISDs.push_back(ISD::FMA); |
1231 | break; |
1232 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1233 | case Intrinsic::lifetime_start: |
1234 | case Intrinsic::lifetime_end: |
1235 | case Intrinsic::sideeffect: |
1236 | return 0; |
1237 | case Intrinsic::masked_store: |
1238 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1239 | 0); |
1240 | case Intrinsic::masked_load: |
1241 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1242 | case Intrinsic::experimental_vector_reduce_add: |
1243 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1244 | /*IsPairwiseForm=*/false); |
1245 | case Intrinsic::experimental_vector_reduce_mul: |
1246 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1247 | /*IsPairwiseForm=*/false); |
1248 | case Intrinsic::experimental_vector_reduce_and: |
1249 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1250 | /*IsPairwiseForm=*/false); |
1251 | case Intrinsic::experimental_vector_reduce_or: |
1252 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1253 | /*IsPairwiseForm=*/false); |
1254 | case Intrinsic::experimental_vector_reduce_xor: |
1255 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1256 | /*IsPairwiseForm=*/false); |
1257 | case Intrinsic::experimental_vector_reduce_fadd: |
1258 | return ConcreteTTI->getArithmeticReductionCost(Instruction::FAdd, Tys[0], |
1259 | /*IsPairwiseForm=*/false); |
1260 | case Intrinsic::experimental_vector_reduce_fmul: |
1261 | return ConcreteTTI->getArithmeticReductionCost(Instruction::FMul, Tys[0], |
1262 | /*IsPairwiseForm=*/false); |
1263 | case Intrinsic::experimental_vector_reduce_smax: |
1264 | case Intrinsic::experimental_vector_reduce_smin: |
1265 | case Intrinsic::experimental_vector_reduce_fmax: |
1266 | case Intrinsic::experimental_vector_reduce_fmin: |
1267 | return ConcreteTTI->getMinMaxReductionCost( |
1268 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1269 | /*IsSigned=*/true); |
1270 | case Intrinsic::experimental_vector_reduce_umax: |
1271 | case Intrinsic::experimental_vector_reduce_umin: |
1272 | return ConcreteTTI->getMinMaxReductionCost( |
1273 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1274 | /*IsSigned=*/false); |
1275 | case Intrinsic::sadd_sat: |
1276 | case Intrinsic::ssub_sat: { |
1277 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1278 | if (RetVF > 1) |
1279 | CondTy = VectorType::get(CondTy, RetVF); |
1280 | |
1281 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1282 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1283 | ? Intrinsic::sadd_with_overflow |
1284 | : Intrinsic::ssub_with_overflow; |
1285 | |
1286 | // SatMax -> Overflow && SumDiff < 0 |
1287 | // SatMin -> Overflow && SumDiff >= 0 |
1288 | unsigned Cost = 0; |
1289 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1290 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1291 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1292 | CondTy, nullptr); |
1293 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1294 | CondTy, nullptr); |
1295 | return Cost; |
1296 | } |
1297 | case Intrinsic::uadd_sat: |
1298 | case Intrinsic::usub_sat: { |
1299 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1300 | if (RetVF > 1) |
1301 | CondTy = VectorType::get(CondTy, RetVF); |
1302 | |
1303 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1304 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1305 | ? Intrinsic::uadd_with_overflow |
1306 | : Intrinsic::usub_with_overflow; |
1307 | |
1308 | unsigned Cost = 0; |
1309 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1310 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1311 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1312 | CondTy, nullptr); |
1313 | return Cost; |
1314 | } |
1315 | case Intrinsic::smul_fix: |
1316 | case Intrinsic::umul_fix: { |
1317 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1318 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1319 | if (RetVF > 1) |
1320 | ExtTy = VectorType::get(ExtTy, RetVF); |
1321 | |
1322 | unsigned ExtOp = |
1323 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1324 | |
1325 | unsigned Cost = 0; |
1326 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1327 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1328 | Cost += |
1329 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1330 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1331 | TTI::OK_AnyValue, |
1332 | TTI::OK_UniformConstantValue); |
1333 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1334 | TTI::OK_AnyValue, |
1335 | TTI::OK_UniformConstantValue); |
1336 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1337 | return Cost; |
1338 | } |
1339 | case Intrinsic::sadd_with_overflow: |
1340 | case Intrinsic::ssub_with_overflow: { |
1341 | Type *SumTy = RetTy->getContainedType(0); |
1342 | Type *OverflowTy = RetTy->getContainedType(1); |
1343 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1344 | ? BinaryOperator::Add |
1345 | : BinaryOperator::Sub; |
1346 | |
1347 | // LHSSign -> LHS >= 0 |
1348 | // RHSSign -> RHS >= 0 |
1349 | // SumSign -> Sum >= 0 |
1350 | // |
1351 | // Add: |
1352 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1353 | // Sub: |
1354 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1355 | unsigned Cost = 0; |
1356 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1357 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1358 | OverflowTy, nullptr); |
1359 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1360 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1361 | Cost += |
1362 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1363 | return Cost; |
1364 | } |
1365 | case Intrinsic::uadd_with_overflow: |
1366 | case Intrinsic::usub_with_overflow: { |
1367 | Type *SumTy = RetTy->getContainedType(0); |
1368 | Type *OverflowTy = RetTy->getContainedType(1); |
1369 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1370 | ? BinaryOperator::Add |
1371 | : BinaryOperator::Sub; |
1372 | |
1373 | unsigned Cost = 0; |
1374 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1375 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1376 | OverflowTy, nullptr); |
1377 | return Cost; |
1378 | } |
1379 | case Intrinsic::smul_with_overflow: |
1380 | case Intrinsic::umul_with_overflow: { |
1381 | Type *MulTy = RetTy->getContainedType(0); |
1382 | Type *OverflowTy = RetTy->getContainedType(1); |
1383 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1384 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1385 | if (MulTy->isVectorTy()) |
1386 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); |
1387 | |
1388 | unsigned ExtOp = |
1389 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1390 | |
1391 | unsigned Cost = 0; |
1392 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1393 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1394 | Cost += |
1395 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1396 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1397 | TTI::OK_AnyValue, |
1398 | TTI::OK_UniformConstantValue); |
1399 | |
1400 | if (IID == Intrinsic::smul_with_overflow) |
1401 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1402 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1403 | TTI::OK_UniformConstantValue); |
1404 | |
1405 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1406 | OverflowTy, nullptr); |
1407 | return Cost; |
1408 | } |
1409 | case Intrinsic::ctpop: |
1410 | ISDs.push_back(ISD::CTPOP); |
1411 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1412 | // library call but still not a cheap instruction. |
1413 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1414 | break; |
1415 | // FIXME: ctlz, cttz, ... |
1416 | } |
1417 | |
1418 | const TargetLoweringBase *TLI = getTLI(); |
1419 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1420 | |
1421 | SmallVector<unsigned, 2> LegalCost; |
1422 | SmallVector<unsigned, 2> CustomCost; |
1423 | for (unsigned ISD : ISDs) { |
1424 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1425 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1426 | TLI->isFAbsFree(LT.second)) { |
1427 | return 0; |
1428 | } |
1429 | |
1430 | // The operation is legal. Assume it costs 1. |
1431 | // If the type is split to multiple registers, assume that there is some |
1432 | // overhead to this. |
1433 | // TODO: Once we have extract/insert subvector cost we need to use them. |
1434 | if (LT.first > 1) |
1435 | LegalCost.push_back(LT.first * 2); |
1436 | else |
1437 | LegalCost.push_back(LT.first * 1); |
1438 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1439 | // If the operation is custom lowered then assume |
1440 | // that the code is twice as expensive. |
1441 | CustomCost.push_back(LT.first * 2); |
1442 | } |
1443 | } |
1444 | |
1445 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1446 | if (MinLegalCostI != LegalCost.end()) |
1447 | return *MinLegalCostI; |
1448 | |
1449 | auto MinCustomCostI = |
1450 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1451 | if (MinCustomCostI != CustomCost.end()) |
1452 | return *MinCustomCostI; |
1453 | |
1454 | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1455 | // point mul followed by an add. |
1456 | if (IID == Intrinsic::fmuladd) |
1457 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1458 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1459 | |
1460 | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1461 | // this will emit a costly libcall, adding call overhead and spills. Make it |
1462 | // very expensive. |
1463 | if (RetTy->isVectorTy()) { |
1464 | unsigned ScalarizationCost = |
1465 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1466 | ? ScalarizationCostPassed |
1467 | : getScalarizationOverhead(RetTy, true, false)); |
1468 | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1469 | SmallVector<Type *, 4> ScalarTys; |
1470 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1471 | Type *Ty = Tys[i]; |
1472 | if (Ty->isVectorTy()) |
1473 | Ty = Ty->getScalarType(); |
1474 | ScalarTys.push_back(Ty); |
1475 | } |
1476 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1477 | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1478 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1479 | if (Tys[i]->isVectorTy()) { |
1480 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1481 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1482 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1483 | } |
1484 | } |
1485 | |
1486 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1487 | } |
1488 | |
1489 | // This is going to be turned into a library call, make it expensive. |
1490 | return SingleCallCost; |
1491 | } |
1492 | |
1493 | /// Compute a cost of the given call instruction. |
1494 | /// |
1495 | /// Compute the cost of calling function F with return type RetTy and |
1496 | /// argument types Tys. F might be nullptr, in this case the cost of an |
1497 | /// arbitrary call with the specified signature will be returned. |
1498 | /// This is used, for instance, when we estimate call of a vector |
1499 | /// counterpart of the given function. |
1500 | /// \param F Called function, might be nullptr. |
1501 | /// \param RetTy Return value types. |
1502 | /// \param Tys Argument types. |
1503 | /// \returns The cost of Call instruction. |
1504 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1505 | return 10; |
1506 | } |
1507 | |
1508 | unsigned getNumberOfParts(Type *Tp) { |
1509 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1510 | return LT.first; |
1511 | } |
1512 | |
1513 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1514 | const SCEV *) { |
1515 | return 0; |
1516 | } |
1517 | |
1518 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1519 | /// We're assuming that reduction operation are performing the following way: |
1520 | /// 1. Non-pairwise reduction |
1521 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1522 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1523 | /// \----------------v-------------/ \----------v------------/ |
1524 | /// n/2 elements n/2 elements |
1525 | /// %red1 = op <n x t> %val, <n x t> val1 |
1526 | /// After this operation we have a vector %red1 where only the first n/2 |
1527 | /// elements are meaningful, the second n/2 elements are undefined and can be |
1528 | /// dropped. All other operations are actually working with the vector of |
1529 | /// length n/2, not n, though the real vector length is still n. |
1530 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1531 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1532 | /// \----------------v-------------/ \----------v------------/ |
1533 | /// n/4 elements 3*n/4 elements |
1534 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1535 | /// length n/2, the resulting vector has length n/4 etc. |
1536 | /// 2. Pairwise reduction: |
1537 | /// Everything is the same except for an additional shuffle operation which |
1538 | /// is used to produce operands for pairwise kind of reductions. |
1539 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1540 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1541 | /// \-------------v----------/ \----------v------------/ |
1542 | /// n/2 elements n/2 elements |
1543 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1544 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1545 | /// \-------------v----------/ \----------v------------/ |
1546 | /// n/2 elements n/2 elements |
1547 | /// %red1 = op <n x t> %val1, <n x t> val2 |
1548 | /// Again, the operation is performed on <n x t> vector, but the resulting |
1549 | /// vector %red1 is <n/2 x t> vector. |
1550 | /// |
1551 | /// The cost model should take into account that the actual length of the |
1552 | /// vector is reduced on each iteration. |
1553 | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1554 | bool IsPairwise) { |
1555 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1555, __PRETTY_FUNCTION__)); |
1556 | Type *ScalarTy = Ty->getVectorElementType(); |
1557 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1558 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1559 | unsigned ArithCost = 0; |
1560 | unsigned ShuffleCost = 0; |
1561 | auto *ConcreteTTI = static_cast<T *>(this); |
1562 | std::pair<unsigned, MVT> LT = |
1563 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1564 | unsigned LongVectorCount = 0; |
1565 | unsigned MVTLen = |
1566 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1567 | while (NumVecElts > MVTLen) { |
1568 | NumVecElts /= 2; |
1569 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1570 | // Assume the pairwise shuffles add a cost. |
1571 | ShuffleCost += (IsPairwise + 1) * |
1572 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1573 | NumVecElts, SubTy); |
1574 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1575 | Ty = SubTy; |
1576 | ++LongVectorCount; |
1577 | } |
1578 | |
1579 | NumReduxLevels -= LongVectorCount; |
1580 | |
1581 | // The minimal length of the vector is limited by the real length of vector |
1582 | // operations performed on the current platform. That's why several final |
1583 | // reduction operations are performed on the vectors with the same |
1584 | // architecture-dependent length. |
1585 | |
1586 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1587 | // reductions need two shuffles on every level, but the last one. On that |
1588 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1589 | unsigned NumShuffles = NumReduxLevels; |
1590 | if (IsPairwise && NumReduxLevels >= 1) |
1591 | NumShuffles += NumReduxLevels - 1; |
1592 | ShuffleCost += NumShuffles * |
1593 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1594 | 0, Ty); |
1595 | ArithCost += NumReduxLevels * |
1596 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1597 | return ShuffleCost + ArithCost + |
1598 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1599 | } |
1600 | |
1601 | /// Try to calculate op costs for min/max reduction operations. |
1602 | /// \param CondTy Conditional type for the Select instruction. |
1603 | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1604 | bool) { |
1605 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1605, __PRETTY_FUNCTION__)); |
1606 | Type *ScalarTy = Ty->getVectorElementType(); |
1607 | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1608 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1609 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1610 | unsigned CmpOpcode; |
1611 | if (Ty->isFPOrFPVectorTy()) { |
1612 | CmpOpcode = Instruction::FCmp; |
1613 | } else { |
1614 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1615, __PRETTY_FUNCTION__)) |
1615 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/CodeGen/BasicTTIImpl.h" , 1615, __PRETTY_FUNCTION__)); |
1616 | CmpOpcode = Instruction::ICmp; |
1617 | } |
1618 | unsigned MinMaxCost = 0; |
1619 | unsigned ShuffleCost = 0; |
1620 | auto *ConcreteTTI = static_cast<T *>(this); |
1621 | std::pair<unsigned, MVT> LT = |
1622 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1623 | unsigned LongVectorCount = 0; |
1624 | unsigned MVTLen = |
1625 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1626 | while (NumVecElts > MVTLen) { |
1627 | NumVecElts /= 2; |
1628 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1629 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1630 | |
1631 | // Assume the pairwise shuffles add a cost. |
1632 | ShuffleCost += (IsPairwise + 1) * |
1633 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1634 | NumVecElts, SubTy); |
1635 | MinMaxCost += |
1636 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1637 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1638 | nullptr); |
1639 | Ty = SubTy; |
1640 | ++LongVectorCount; |
1641 | } |
1642 | |
1643 | NumReduxLevels -= LongVectorCount; |
1644 | |
1645 | // The minimal length of the vector is limited by the real length of vector |
1646 | // operations performed on the current platform. That's why several final |
1647 | // reduction opertions are perfomed on the vectors with the same |
1648 | // architecture-dependent length. |
1649 | |
1650 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1651 | // reductions need two shuffles on every level, but the last one. On that |
1652 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1653 | unsigned NumShuffles = NumReduxLevels; |
1654 | if (IsPairwise && NumReduxLevels >= 1) |
1655 | NumShuffles += NumReduxLevels - 1; |
1656 | ShuffleCost += NumShuffles * |
1657 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1658 | 0, Ty); |
1659 | MinMaxCost += |
1660 | NumReduxLevels * |
1661 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1662 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1663 | nullptr)); |
1664 | // The last min/max should be in vector registers and we counted it above. |
1665 | // So just need a single extractelement. |
1666 | return ShuffleCost + MinMaxCost + |
1667 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1668 | } |
1669 | |
1670 | unsigned getVectorSplitCost() { return 1; } |
1671 | |
1672 | /// @} |
1673 | }; |
1674 | |
1675 | /// Concrete BasicTTIImpl that can be used if no further customization |
1676 | /// is needed. |
1677 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1678 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1679 | |
1680 | friend class BasicTTIImplBase<BasicTTIImpl>; |
1681 | |
1682 | const TargetSubtargetInfo *ST; |
1683 | const TargetLoweringBase *TLI; |
1684 | |
1685 | const TargetSubtargetInfo *getST() const { return ST; } |
1686 | const TargetLoweringBase *getTLI() const { return TLI; } |
1687 | |
1688 | public: |
1689 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1690 | }; |
1691 | |
1692 | } // end namespace llvm |
1693 | |
1694 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |