Line data Source code
1 : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : /// \file
10 : /// This pass exposes codegen information to IR-level passes. Every
11 : /// transformation that uses codegen information is broken into three parts:
12 : /// 1. The IR-level analysis pass.
13 : /// 2. The IR-level transformation interface which provides the needed
14 : /// information.
15 : /// 3. Codegen-level implementation which uses target-specific hooks.
16 : ///
17 : /// This file defines #2, which is the interface that IR-level transformations
18 : /// use for querying the codegen.
19 : ///
20 : //===----------------------------------------------------------------------===//
21 :
22 : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24 :
25 : #include "llvm/ADT/Optional.h"
26 : #include "llvm/IR/Operator.h"
27 : #include "llvm/IR/PassManager.h"
28 : #include "llvm/Pass.h"
29 : #include "llvm/Support/AtomicOrdering.h"
30 : #include "llvm/Support/DataTypes.h"
31 : #include <functional>
32 :
33 : namespace llvm {
34 :
35 : namespace Intrinsic {
36 : enum ID : unsigned;
37 : }
38 :
39 : class Function;
40 : class GlobalValue;
41 : class IntrinsicInst;
42 : class LoadInst;
43 : class Loop;
44 : class SCEV;
45 : class ScalarEvolution;
46 : class StoreInst;
47 : class SwitchInst;
48 : class Type;
49 : class User;
50 : class Value;
51 :
52 : /// Information about a load/store intrinsic defined by the target.
53 3333800 : struct MemIntrinsicInfo {
54 : /// This is the pointer that the intrinsic is loading from or storing to.
55 : /// If this is non-null, then analysis/optimization passes can assume that
56 : /// this intrinsic is functionally equivalent to a load/store from this
57 : /// pointer.
58 : Value *PtrVal = nullptr;
59 :
60 : // Ordering for atomic operations.
61 : AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
62 :
63 : // Same Id is set by the target for corresponding load/store intrinsics.
64 : unsigned short MatchingId = 0;
65 :
66 : bool ReadMem = false;
67 : bool WriteMem = false;
68 : bool IsVolatile = false;
69 :
70 0 : bool isUnordered() const {
71 806 : return (Ordering == AtomicOrdering::NotAtomic ||
72 806 : Ordering == AtomicOrdering::Unordered) && !IsVolatile;
73 : }
74 : };
75 :
76 : /// This pass provides access to the codegen interfaces that are needed
77 : /// for IR-level transformations.
78 : class TargetTransformInfo {
79 : public:
80 : /// Construct a TTI object using a type implementing the \c Concept
81 : /// API below.
82 : ///
83 : /// This is used by targets to construct a TTI wrapping their target-specific
84 : /// implementaion that encodes appropriate costs for their target.
85 : template <typename T> TargetTransformInfo(T Impl);
86 :
87 : /// Construct a baseline TTI object using a minimal implementation of
88 : /// the \c Concept API below.
89 : ///
90 : /// The TTI implementation will reflect the information in the DataLayout
91 : /// provided if non-null.
92 : explicit TargetTransformInfo(const DataLayout &DL);
93 :
94 : // Provide move semantics.
95 : TargetTransformInfo(TargetTransformInfo &&Arg);
96 : TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
97 :
98 : // We need to define the destructor out-of-line to define our sub-classes
99 : // out-of-line.
100 : ~TargetTransformInfo();
101 :
102 : /// Handle the invalidation of this information.
103 : ///
104 : /// When used as a result of \c TargetIRAnalysis this method will be called
105 : /// when the function this was computed for changes. When it returns false,
106 : /// the information is preserved across those changes.
107 0 : bool invalidate(Function &, const PreservedAnalyses &,
108 : FunctionAnalysisManager::Invalidator &) {
109 : // FIXME: We should probably in some way ensure that the subtarget
110 : // information for a function hasn't changed.
111 0 : return false;
112 : }
113 :
114 : /// \name Generic Target Information
115 : /// @{
116 :
117 : /// The kind of cost model.
118 : ///
119 : /// There are several different cost models that can be customized by the
120 : /// target. The normalization of each cost model may be target specific.
121 : enum TargetCostKind {
122 : TCK_RecipThroughput, ///< Reciprocal throughput.
123 : TCK_Latency, ///< The latency of instruction.
124 : TCK_CodeSize ///< Instruction code size.
125 : };
126 :
127 : /// Query the cost of a specified instruction.
128 : ///
129 : /// Clients should use this interface to query the cost of an existing
130 : /// instruction. The instruction must have a valid parent (basic block).
131 : ///
132 : /// Note, this method does not cache the cost calculation and it
133 : /// can be expensive in some cases.
134 27770 : int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
135 27770 : switch (kind){
136 27748 : case TCK_RecipThroughput:
137 30648 : return getInstructionThroughput(I);
138 :
139 11 : case TCK_Latency:
140 11 : return getInstructionLatency(I);
141 :
142 11 : case TCK_CodeSize:
143 234 : return getUserCost(I);
144 : }
145 0 : llvm_unreachable("Unknown instruction cost kind");
146 : }
147 :
148 : /// Underlying constants for 'cost' values in this interface.
149 : ///
150 : /// Many APIs in this interface return a cost. This enum defines the
151 : /// fundamental values that should be used to interpret (and produce) those
152 : /// costs. The costs are returned as an int rather than a member of this
153 : /// enumeration because it is expected that the cost of one IR instruction
154 : /// may have a multiplicative factor to it or otherwise won't fit directly
155 : /// into the enum. Moreover, it is common to sum or average costs which works
156 : /// better as simple integral values. Thus this enum only provides constants.
157 : /// Also note that the returned costs are signed integers to make it natural
158 : /// to add, subtract, and test with zero (a common boundary condition). It is
159 : /// not expected that 2^32 is a realistic cost to be modeling at any point.
160 : ///
161 : /// Note that these costs should usually reflect the intersection of code-size
162 : /// cost and execution cost. A free instruction is typically one that folds
163 : /// into another instruction. For example, reg-to-reg moves can often be
164 : /// skipped by renaming the registers in the CPU, but they still are encoded
165 : /// and thus wouldn't be considered 'free' here.
166 : enum TargetCostConstants {
167 : TCC_Free = 0, ///< Expected to fold away in lowering.
168 : TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
169 : TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
170 : };
171 :
172 : /// Estimate the cost of a specific operation when lowered.
173 : ///
174 : /// Note that this is designed to work on an arbitrary synthetic opcode, and
175 : /// thus work for hypothetical queries before an instruction has even been
176 : /// formed. However, this does *not* work for GEPs, and must not be called
177 : /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
178 : /// analyzing a GEP's cost required more information.
179 : ///
180 : /// Typically only the result type is required, and the operand type can be
181 : /// omitted. However, if the opcode is one of the cast instructions, the
182 : /// operand type is required.
183 : ///
184 : /// The returned cost is defined in terms of \c TargetCostConstants, see its
185 : /// comments for a detailed explanation of the cost values.
186 : int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
187 :
188 : /// Estimate the cost of a GEP operation when lowered.
189 : ///
190 : /// The contract for this function is the same as \c getOperationCost except
191 : /// that it supports an interface that provides extra information specific to
192 : /// the GEP operation.
193 : int getGEPCost(Type *PointeeType, const Value *Ptr,
194 : ArrayRef<const Value *> Operands) const;
195 :
196 : /// Estimate the cost of a EXT operation when lowered.
197 : ///
198 : /// The contract for this function is the same as \c getOperationCost except
199 : /// that it supports an interface that provides extra information specific to
200 : /// the EXT operation.
201 : int getExtCost(const Instruction *I, const Value *Src) const;
202 :
203 : /// Estimate the cost of a function call when lowered.
204 : ///
205 : /// The contract for this is the same as \c getOperationCost except that it
206 : /// supports an interface that provides extra information specific to call
207 : /// instructions.
208 : ///
209 : /// This is the most basic query for estimating call cost: it only knows the
210 : /// function type and (potentially) the number of arguments at the call site.
211 : /// The latter is only interesting for varargs function types.
212 : int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
213 :
214 : /// Estimate the cost of calling a specific function when lowered.
215 : ///
216 : /// This overload adds the ability to reason about the particular function
217 : /// being called in the event it is a library call with special lowering.
218 : int getCallCost(const Function *F, int NumArgs = -1) const;
219 :
220 : /// Estimate the cost of calling a specific function when lowered.
221 : ///
222 : /// This overload allows specifying a set of candidate argument values.
223 : int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
224 :
225 : /// \returns A value by which our inlining threshold should be multiplied.
226 : /// This is primarily used to bump up the inlining threshold wholesale on
227 : /// targets where calls are unusually expensive.
228 : ///
229 : /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
230 : /// individual classes of instructions would be better.
231 : unsigned getInliningThresholdMultiplier() const;
232 :
233 : /// Estimate the cost of an intrinsic when lowered.
234 : ///
235 : /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
236 : int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
237 : ArrayRef<Type *> ParamTys) const;
238 :
239 : /// Estimate the cost of an intrinsic when lowered.
240 : ///
241 : /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
242 : int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
243 : ArrayRef<const Value *> Arguments) const;
244 :
245 : /// \return The estimated number of case clusters when lowering \p 'SI'.
246 : /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
247 : /// table.
248 : unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
249 : unsigned &JTSize) const;
250 :
251 : /// Estimate the cost of a given IR user when lowered.
252 : ///
253 : /// This can estimate the cost of either a ConstantExpr or Instruction when
254 : /// lowered. It has two primary advantages over the \c getOperationCost and
255 : /// \c getGEPCost above, and one significant disadvantage: it can only be
256 : /// used when the IR construct has already been formed.
257 : ///
258 : /// The advantages are that it can inspect the SSA use graph to reason more
259 : /// accurately about the cost. For example, all-constant-GEPs can often be
260 : /// folded into a load or other instruction, but if they are used in some
261 : /// other context they may not be folded. This routine can distinguish such
262 : /// cases.
263 : ///
264 : /// \p Operands is a list of operands which can be a result of transformations
265 : /// of the current operands. The number of the operands on the list must equal
266 : /// to the number of the current operands the IR user has. Their order on the
267 : /// list must be the same as the order of the current operands the IR user
268 : /// has.
269 : ///
270 : /// The returned cost is defined in terms of \c TargetCostConstants, see its
271 : /// comments for a detailed explanation of the cost values.
272 : int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
273 :
274 : /// This is a helper function which calls the two-argument getUserCost
275 : /// with \p Operands which are the current operands U has.
276 4009483 : int getUserCost(const User *U) const {
277 : SmallVector<const Value *, 4> Operands(U->value_op_begin(),
278 4009483 : U->value_op_end());
279 4009483 : return getUserCost(U, Operands);
280 : }
281 :
282 : /// Return true if branch divergence exists.
283 : ///
284 : /// Branch divergence has a significantly negative impact on GPU performance
285 : /// when threads in the same wavefront take different paths due to conditional
286 : /// branches.
287 : bool hasBranchDivergence() const;
288 :
289 : /// Returns whether V is a source of divergence.
290 : ///
291 : /// This function provides the target-dependent information for
292 : /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
293 : /// builds the dependency graph, and then runs the reachability algorithm
294 : /// starting with the sources of divergence.
295 : bool isSourceOfDivergence(const Value *V) const;
296 :
297 : // Returns true for the target specific
298 : // set of operations which produce uniform result
299 : // even taking non-unform arguments
300 : bool isAlwaysUniform(const Value *V) const;
301 :
302 : /// Returns the address space ID for a target's 'flat' address space. Note
303 : /// this is not necessarily the same as addrspace(0), which LLVM sometimes
304 : /// refers to as the generic address space. The flat address space is a
305 : /// generic address space that can be used access multiple segments of memory
306 : /// with different address spaces. Access of a memory location through a
307 : /// pointer with this address space is expected to be legal but slower
308 : /// compared to the same memory location accessed through a pointer with a
309 : /// different address space.
310 : //
311 : /// This is for targets with different pointer representations which can
312 : /// be converted with the addrspacecast instruction. If a pointer is converted
313 : /// to this address space, optimizations should attempt to replace the access
314 : /// with the source address space.
315 : ///
316 : /// \returns ~0u if the target does not have such a flat address space to
317 : /// optimize away.
318 : unsigned getFlatAddressSpace() const;
319 :
320 : /// Test whether calls to a function lower to actual program function
321 : /// calls.
322 : ///
323 : /// The idea is to test whether the program is likely to require a 'call'
324 : /// instruction or equivalent in order to call the given function.
325 : ///
326 : /// FIXME: It's not clear that this is a good or useful query API. Client's
327 : /// should probably move to simpler cost metrics using the above.
328 : /// Alternatively, we could split the cost interface into distinct code-size
329 : /// and execution-speed costs. This would allow modelling the core of this
330 : /// query more accurately as a call is a single small instruction, but
331 : /// incurs significant execution cost.
332 : bool isLoweredToCall(const Function *F) const;
333 :
334 : struct LSRCost {
335 : /// TODO: Some of these could be merged. Also, a lexical ordering
336 : /// isn't always optimal.
337 : unsigned Insns;
338 : unsigned NumRegs;
339 : unsigned AddRecCost;
340 : unsigned NumIVMuls;
341 : unsigned NumBaseAdds;
342 : unsigned ImmCost;
343 : unsigned SetupCost;
344 : unsigned ScaleCost;
345 : };
346 :
347 : /// Parameters that control the generic loop unrolling transformation.
348 : struct UnrollingPreferences {
349 : /// The cost threshold for the unrolled loop. Should be relative to the
350 : /// getUserCost values returned by this API, and the expectation is that
351 : /// the unrolled loop's instructions when run through that interface should
352 : /// not exceed this cost. However, this is only an estimate. Also, specific
353 : /// loops may be unrolled even with a cost above this threshold if deemed
354 : /// profitable. Set this to UINT_MAX to disable the loop body cost
355 : /// restriction.
356 : unsigned Threshold;
357 : /// If complete unrolling will reduce the cost of the loop, we will boost
358 : /// the Threshold by a certain percent to allow more aggressive complete
359 : /// unrolling. This value provides the maximum boost percentage that we
360 : /// can apply to Threshold (The value should be no less than 100).
361 : /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
362 : /// MaxPercentThresholdBoost / 100)
363 : /// E.g. if complete unrolling reduces the loop execution time by 50%
364 : /// then we boost the threshold by the factor of 2x. If unrolling is not
365 : /// expected to reduce the running time, then we do not increase the
366 : /// threshold.
367 : unsigned MaxPercentThresholdBoost;
368 : /// The cost threshold for the unrolled loop when optimizing for size (set
369 : /// to UINT_MAX to disable).
370 : unsigned OptSizeThreshold;
371 : /// The cost threshold for the unrolled loop, like Threshold, but used
372 : /// for partial/runtime unrolling (set to UINT_MAX to disable).
373 : unsigned PartialThreshold;
374 : /// The cost threshold for the unrolled loop when optimizing for size, like
375 : /// OptSizeThreshold, but used for partial/runtime unrolling (set to
376 : /// UINT_MAX to disable).
377 : unsigned PartialOptSizeThreshold;
378 : /// A forced unrolling factor (the number of concatenated bodies of the
379 : /// original loop in the unrolled loop body). When set to 0, the unrolling
380 : /// transformation will select an unrolling factor based on the current cost
381 : /// threshold and other factors.
382 : unsigned Count;
383 : /// A forced peeling factor (the number of bodied of the original loop
384 : /// that should be peeled off before the loop body). When set to 0, the
385 : /// unrolling transformation will select a peeling factor based on profile
386 : /// information and other factors.
387 : unsigned PeelCount;
388 : /// Default unroll count for loops with run-time trip count.
389 : unsigned DefaultUnrollRuntimeCount;
390 : // Set the maximum unrolling factor. The unrolling factor may be selected
391 : // using the appropriate cost threshold, but may not exceed this number
392 : // (set to UINT_MAX to disable). This does not apply in cases where the
393 : // loop is being fully unrolled.
394 : unsigned MaxCount;
395 : /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
396 : /// applies even if full unrolling is selected. This allows a target to fall
397 : /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
398 : unsigned FullUnrollMaxCount;
399 : // Represents number of instructions optimized when "back edge"
400 : // becomes "fall through" in unrolled loop.
401 : // For now we count a conditional branch on a backedge and a comparison
402 : // feeding it.
403 : unsigned BEInsns;
404 : /// Allow partial unrolling (unrolling of loops to expand the size of the
405 : /// loop body, not only to eliminate small constant-trip-count loops).
406 : bool Partial;
407 : /// Allow runtime unrolling (unrolling of loops to expand the size of the
408 : /// loop body even when the number of loop iterations is not known at
409 : /// compile time).
410 : bool Runtime;
411 : /// Allow generation of a loop remainder (extra iterations after unroll).
412 : bool AllowRemainder;
413 : /// Allow emitting expensive instructions (such as divisions) when computing
414 : /// the trip count of a loop for runtime unrolling.
415 : bool AllowExpensiveTripCount;
416 : /// Apply loop unroll on any kind of loop
417 : /// (mainly to loops that fail runtime unrolling).
418 : bool Force;
419 : /// Allow using trip count upper bound to unroll loops.
420 : bool UpperBound;
421 : /// Allow peeling off loop iterations for loops with low dynamic tripcount.
422 : bool AllowPeeling;
423 : /// Allow unrolling of all the iterations of the runtime loop remainder.
424 : bool UnrollRemainder;
425 : /// Allow unroll and jam. Used to enable unroll and jam for the target.
426 : bool UnrollAndJam;
427 : /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
428 : /// value above is used during unroll and jam for the outer loop size.
429 : /// This value is used in the same manner to limit the size of the inner
430 : /// loop.
431 : unsigned UnrollAndJamInnerLoopThreshold;
432 : };
433 :
434 : /// Get target-customized preferences for the generic loop unrolling
435 : /// transformation. The caller will initialize UP with the current
436 : /// target-independent defaults.
437 : void getUnrollingPreferences(Loop *L, ScalarEvolution &,
438 : UnrollingPreferences &UP) const;
439 :
440 : /// @}
441 :
442 : /// \name Scalar Target Information
443 : /// @{
444 :
445 : /// Flags indicating the kind of support for population count.
446 : ///
447 : /// Compared to the SW implementation, HW support is supposed to
448 : /// significantly boost the performance when the population is dense, and it
449 : /// may or may not degrade performance if the population is sparse. A HW
450 : /// support is considered as "Fast" if it can outperform, or is on a par
451 : /// with, SW implementation when the population is sparse; otherwise, it is
452 : /// considered as "Slow".
453 : enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
454 :
455 : /// Return true if the specified immediate is legal add immediate, that
456 : /// is the target has add instructions which can add a register with the
457 : /// immediate without having to materialize the immediate into a register.
458 : bool isLegalAddImmediate(int64_t Imm) const;
459 :
460 : /// Return true if the specified immediate is legal icmp immediate,
461 : /// that is the target has icmp instructions which can compare a register
462 : /// against the immediate without having to materialize the immediate into a
463 : /// register.
464 : bool isLegalICmpImmediate(int64_t Imm) const;
465 :
466 : /// Return true if the addressing mode represented by AM is legal for
467 : /// this target, for a load/store of the specified type.
468 : /// The type may be VoidTy, in which case only return true if the addressing
469 : /// mode is legal for a load/store of any legal type.
470 : /// If target returns true in LSRWithInstrQueries(), I may be valid.
471 : /// TODO: Handle pre/postinc as well.
472 : bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
473 : bool HasBaseReg, int64_t Scale,
474 : unsigned AddrSpace = 0,
475 : Instruction *I = nullptr) const;
476 :
477 : /// Return true if LSR cost of C1 is lower than C1.
478 : bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
479 : TargetTransformInfo::LSRCost &C2) const;
480 :
481 : /// Return true if the target can fuse a compare and branch.
482 : /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
483 : /// calculation for the instructions in a loop.
484 : bool canMacroFuseCmp() const;
485 :
486 : /// \return True is LSR should make efforts to create/preserve post-inc
487 : /// addressing mode expressions.
488 : bool shouldFavorPostInc() const;
489 :
490 : /// Return true if the target supports masked load/store
491 : /// AVX2 and AVX-512 targets allow masks for consecutive load and store
492 : bool isLegalMaskedStore(Type *DataType) const;
493 : bool isLegalMaskedLoad(Type *DataType) const;
494 :
495 : /// Return true if the target supports masked gather/scatter
496 : /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
497 : /// bits scalar type.
498 : bool isLegalMaskedScatter(Type *DataType) const;
499 : bool isLegalMaskedGather(Type *DataType) const;
500 :
501 : /// Return true if the target has a unified operation to calculate division
502 : /// and remainder. If so, the additional implicit multiplication and
503 : /// subtraction required to calculate a remainder from division are free. This
504 : /// can enable more aggressive transformations for division and remainder than
505 : /// would typically be allowed using throughput or size cost models.
506 : bool hasDivRemOp(Type *DataType, bool IsSigned) const;
507 :
508 : /// Return true if the given instruction (assumed to be a memory access
509 : /// instruction) has a volatile variant. If that's the case then we can avoid
510 : /// addrspacecast to generic AS for volatile loads/stores. Default
511 : /// implementation returns false, which prevents address space inference for
512 : /// volatile loads/stores.
513 : bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
514 :
515 : /// Return true if target doesn't mind addresses in vectors.
516 : bool prefersVectorizedAddressing() const;
517 :
518 : /// Return the cost of the scaling factor used in the addressing
519 : /// mode represented by AM for this target, for a load/store
520 : /// of the specified type.
521 : /// If the AM is supported, the return value must be >= 0.
522 : /// If the AM is not supported, it returns a negative value.
523 : /// TODO: Handle pre/postinc as well.
524 : int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
525 : bool HasBaseReg, int64_t Scale,
526 : unsigned AddrSpace = 0) const;
527 :
528 : /// Return true if the loop strength reduce pass should make
529 : /// Instruction* based TTI queries to isLegalAddressingMode(). This is
530 : /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
531 : /// immediate offset and no index register.
532 : bool LSRWithInstrQueries() const;
533 :
534 : /// Return true if it's free to truncate a value of type Ty1 to type
535 : /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
536 : /// by referencing its sub-register AX.
537 : bool isTruncateFree(Type *Ty1, Type *Ty2) const;
538 :
539 : /// Return true if it is profitable to hoist instruction in the
540 : /// then/else to before if.
541 : bool isProfitableToHoist(Instruction *I) const;
542 :
543 : bool useAA() const;
544 :
545 : /// Return true if this type is legal.
546 : bool isTypeLegal(Type *Ty) const;
547 :
548 : /// Returns the target's jmp_buf alignment in bytes.
549 : unsigned getJumpBufAlignment() const;
550 :
551 : /// Returns the target's jmp_buf size in bytes.
552 : unsigned getJumpBufSize() const;
553 :
554 : /// Return true if switches should be turned into lookup tables for the
555 : /// target.
556 : bool shouldBuildLookupTables() const;
557 :
558 : /// Return true if switches should be turned into lookup tables
559 : /// containing this constant value for the target.
560 : bool shouldBuildLookupTablesForConstant(Constant *C) const;
561 :
562 : /// Return true if the input function which is cold at all call sites,
563 : /// should use coldcc calling convention.
564 : bool useColdCCForColdCall(Function &F) const;
565 :
566 : unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
567 :
568 : unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
569 : unsigned VF) const;
570 :
571 : /// If target has efficient vector element load/store instructions, it can
572 : /// return true here so that insertion/extraction costs are not added to
573 : /// the scalarization cost of a load/store.
574 : bool supportsEfficientVectorElementLoadStore() const;
575 :
576 : /// Don't restrict interleaved unrolling to small loops.
577 : bool enableAggressiveInterleaving(bool LoopHasReductions) const;
578 :
579 : /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
580 : /// true if this is the expansion of memcmp(p1, p2, s) == 0.
581 : struct MemCmpExpansionOptions {
582 : // The list of available load sizes (in bytes), sorted in decreasing order.
583 : SmallVector<unsigned, 8> LoadSizes;
584 : };
585 : const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
586 :
587 : /// Enable matching of interleaved access groups.
588 : bool enableInterleavedAccessVectorization() const;
589 :
590 : /// Enable matching of interleaved access groups that contain predicated
591 : /// accesses and are vectorized using masked vector loads/stores.
592 : bool enableMaskedInterleavedAccessVectorization() const;
593 :
594 : /// Indicate that it is potentially unsafe to automatically vectorize
595 : /// floating-point operations because the semantics of vector and scalar
596 : /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
597 : /// does not support IEEE-754 denormal numbers, while depending on the
598 : /// platform, scalar floating-point math does.
599 : /// This applies to floating-point math operations and calls, not memory
600 : /// operations, shuffles, or casts.
601 : bool isFPVectorizationPotentiallyUnsafe() const;
602 :
603 : /// Determine if the target supports unaligned memory accesses.
604 : bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
605 : unsigned BitWidth, unsigned AddressSpace = 0,
606 : unsigned Alignment = 1,
607 : bool *Fast = nullptr) const;
608 :
609 : /// Return hardware support for population count.
610 : PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
611 :
612 : /// Return true if the hardware has a fast square-root instruction.
613 : bool haveFastSqrt(Type *Ty) const;
614 :
615 : /// Return true if it is faster to check if a floating-point value is NaN
616 : /// (or not-NaN) versus a comparison against a constant FP zero value.
617 : /// Targets should override this if materializing a 0.0 for comparison is
618 : /// generally as cheap as checking for ordered/unordered.
619 : bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
620 :
621 : /// Return the expected cost of supporting the floating point operation
622 : /// of the specified type.
623 : int getFPOpCost(Type *Ty) const;
624 :
625 : /// Return the expected cost of materializing for the given integer
626 : /// immediate of the specified type.
627 : int getIntImmCost(const APInt &Imm, Type *Ty) const;
628 :
629 : /// Return the expected cost of materialization for the given integer
630 : /// immediate of the specified type for a given instruction. The cost can be
631 : /// zero if the immediate can be folded into the specified instruction.
632 : int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
633 : Type *Ty) const;
634 : int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
635 : Type *Ty) const;
636 :
637 : /// Return the expected cost for the given integer when optimising
638 : /// for size. This is different than the other integer immediate cost
639 : /// functions in that it is subtarget agnostic. This is useful when you e.g.
640 : /// target one ISA such as Aarch32 but smaller encodings could be possible
641 : /// with another such as Thumb. This return value is used as a penalty when
642 : /// the total costs for a constant is calculated (the bigger the cost, the
643 : /// more beneficial constant hoisting is).
644 : int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
645 : Type *Ty) const;
646 : /// @}
647 :
648 : /// \name Vector Target Information
649 : /// @{
650 :
651 : /// The various kinds of shuffle patterns for vector queries.
652 : enum ShuffleKind {
653 : SK_Broadcast, ///< Broadcast element 0 to all other elements.
654 : SK_Reverse, ///< Reverse the order of the vector.
655 : SK_Select, ///< Selects elements from the corresponding lane of
656 : ///< either source operand. This is equivalent to a
657 : ///< vector select with a constant condition operand.
658 : SK_Transpose, ///< Transpose two vectors.
659 : SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
660 : SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
661 : SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
662 : ///< with any shuffle mask.
663 : SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
664 : ///< shuffle mask.
665 : };
666 :
667 : /// Additional information about an operand's possible values.
668 : enum OperandValueKind {
669 : OK_AnyValue, // Operand can have any value.
670 : OK_UniformValue, // Operand is uniform (splat of a value).
671 : OK_UniformConstantValue, // Operand is uniform constant.
672 : OK_NonUniformConstantValue // Operand is a non uniform constant value.
673 : };
674 :
675 : /// Additional properties of an operand's values.
676 : enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
677 :
678 : /// \return The number of scalar or vector registers that the target has.
679 : /// If 'Vectors' is true, it returns the number of vector registers. If it is
680 : /// set to false, it returns the number of scalar registers.
681 : unsigned getNumberOfRegisters(bool Vector) const;
682 :
683 : /// \return The width of the largest scalar or vector register type.
684 : unsigned getRegisterBitWidth(bool Vector) const;
685 :
686 : /// \return The width of the smallest vector register type.
687 : unsigned getMinVectorRegisterBitWidth() const;
688 :
689 : /// \return True if the vectorization factor should be chosen to
690 : /// make the vector of the smallest element type match the size of a
691 : /// vector register. For wider element types, this could result in
692 : /// creating vectors that span multiple vector registers.
693 : /// If false, the vectorization factor will be chosen based on the
694 : /// size of the widest element type.
695 : bool shouldMaximizeVectorBandwidth(bool OptSize) const;
696 :
697 : /// \return The minimum vectorization factor for types of given element
698 : /// bit width, or 0 if there is no mimimum VF. The returned value only
699 : /// applies when shouldMaximizeVectorBandwidth returns true.
700 : unsigned getMinimumVF(unsigned ElemWidth) const;
701 :
702 : /// \return True if it should be considered for address type promotion.
703 : /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
704 : /// profitable without finding other extensions fed by the same input.
705 : bool shouldConsiderAddressTypePromotion(
706 : const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
707 :
708 : /// \return The size of a cache line in bytes.
709 : unsigned getCacheLineSize() const;
710 :
711 : /// The possible cache levels
712 : enum class CacheLevel {
713 : L1D, // The L1 data cache
714 : L2D, // The L2 data cache
715 :
716 : // We currently do not model L3 caches, as their sizes differ widely between
717 : // microarchitectures. Also, we currently do not have a use for L3 cache
718 : // size modeling yet.
719 : };
720 :
721 : /// \return The size of the cache level in bytes, if available.
722 : llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
723 :
724 : /// \return The associativity of the cache level, if available.
725 : llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
726 :
727 : /// \return How much before a load we should place the prefetch instruction.
728 : /// This is currently measured in number of instructions.
729 : unsigned getPrefetchDistance() const;
730 :
731 : /// \return Some HW prefetchers can handle accesses up to a certain constant
732 : /// stride. This is the minimum stride in bytes where it makes sense to start
733 : /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
734 : unsigned getMinPrefetchStride() const;
735 :
736 : /// \return The maximum number of iterations to prefetch ahead. If the
737 : /// required number of iterations is more than this number, no prefetching is
738 : /// performed.
739 : unsigned getMaxPrefetchIterationsAhead() const;
740 :
741 : /// \return The maximum interleave factor that any transform should try to
742 : /// perform for this target. This number depends on the level of parallelism
743 : /// and the number of execution units in the CPU.
744 : unsigned getMaxInterleaveFactor(unsigned VF) const;
745 :
746 : /// Collect properties of V used in cost analyzis, e.g. OP_PowerOf2.
747 : OperandValueKind getOperandInfo(Value *V,
748 : OperandValueProperties &OpProps) const;
749 :
750 : /// This is an approximation of reciprocal throughput of a math/logic op.
751 : /// A higher cost indicates less expected throughput.
752 : /// From Agner Fog's guides, reciprocal throughput is "the average number of
753 : /// clock cycles per instruction when the instructions are not part of a
754 : /// limiting dependency chain."
755 : /// Therefore, costs should be scaled to account for multiple execution units
756 : /// on the target that can process this type of instruction. For example, if
757 : /// there are 5 scalar integer units and 2 vector integer units that can
758 : /// calculate an 'add' in a single cycle, this model should indicate that the
759 : /// cost of the vector add instruction is 2.5 times the cost of the scalar
760 : /// add instruction.
761 : /// \p Args is an optional argument which holds the instruction operands
762 : /// values so the TTI can analyze those values searching for special
763 : /// cases or optimizations based on those values.
764 : int getArithmeticInstrCost(
765 : unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
766 : OperandValueKind Opd2Info = OK_AnyValue,
767 : OperandValueProperties Opd1PropInfo = OP_None,
768 : OperandValueProperties Opd2PropInfo = OP_None,
769 : ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
770 :
771 : /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
772 : /// The index and subtype parameters are used by the subvector insertion and
773 : /// extraction shuffle kinds.
774 : int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
775 : Type *SubTp = nullptr) const;
776 :
777 : /// \return The expected cost of cast instructions, such as bitcast, trunc,
778 : /// zext, etc. If there is an existing instruction that holds Opcode, it
779 : /// may be passed in the 'I' parameter.
780 : int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
781 : const Instruction *I = nullptr) const;
782 :
783 : /// \return The expected cost of a sign- or zero-extended vector extract. Use
784 : /// -1 to indicate that there is no information about the index value.
785 : int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
786 : unsigned Index = -1) const;
787 :
788 : /// \return The expected cost of control-flow related instructions such as
789 : /// Phi, Ret, Br.
790 : int getCFInstrCost(unsigned Opcode) const;
791 :
792 : /// \returns The expected cost of compare and select instructions. If there
793 : /// is an existing instruction that holds Opcode, it may be passed in the
794 : /// 'I' parameter.
795 : int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
796 : Type *CondTy = nullptr, const Instruction *I = nullptr) const;
797 :
798 : /// \return The expected cost of vector Insert and Extract.
799 : /// Use -1 to indicate that there is no information on the index value.
800 : int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
801 :
802 : /// \return The cost of Load and Store instructions.
803 : int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
804 : unsigned AddressSpace, const Instruction *I = nullptr) const;
805 :
806 : /// \return The cost of masked Load and Store instructions.
807 : int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
808 : unsigned AddressSpace) const;
809 :
810 : /// \return The cost of Gather or Scatter operation
811 : /// \p Opcode - is a type of memory access Load or Store
812 : /// \p DataTy - a vector type of the data to be loaded or stored
813 : /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
814 : /// \p VariableMask - true when the memory access is predicated with a mask
815 : /// that is not a compile-time constant
816 : /// \p Alignment - alignment of single element
817 : int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
818 : bool VariableMask, unsigned Alignment) const;
819 :
820 : /// \return The cost of the interleaved memory operation.
821 : /// \p Opcode is the memory operation code
822 : /// \p VecTy is the vector type of the interleaved access.
823 : /// \p Factor is the interleave factor
824 : /// \p Indices is the indices for interleaved load members (as interleaved
825 : /// load allows gaps)
826 : /// \p Alignment is the alignment of the memory operation
827 : /// \p AddressSpace is address space of the pointer.
828 : /// \p IsMasked indicates if the memory access is predicated.
829 : int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
830 : ArrayRef<unsigned> Indices, unsigned Alignment,
831 : unsigned AddressSpace,
832 : bool IsMasked = false) const;
833 :
834 : /// Calculate the cost of performing a vector reduction.
835 : ///
836 : /// This is the cost of reducing the vector value of type \p Ty to a scalar
837 : /// value using the operation denoted by \p Opcode. The form of the reduction
838 : /// can either be a pairwise reduction or a reduction that splits the vector
839 : /// at every reduction level.
840 : ///
841 : /// Pairwise:
842 : /// (v0, v1, v2, v3)
843 : /// ((v0+v1), (v2+v3), undef, undef)
844 : /// Split:
845 : /// (v0, v1, v2, v3)
846 : /// ((v0+v2), (v1+v3), undef, undef)
847 : int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
848 : bool IsPairwiseForm) const;
849 : int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
850 : bool IsUnsigned) const;
851 :
852 : /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
853 : /// Three cases are handled: 1. scalar instruction 2. vector instruction
854 : /// 3. scalar instruction which is to be vectorized with VF.
855 : int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
856 : ArrayRef<Value *> Args, FastMathFlags FMF,
857 : unsigned VF = 1) const;
858 :
859 : /// \returns The cost of Intrinsic instructions. Types analysis only.
860 : /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
861 : /// arguments and the return value will be computed based on types.
862 : int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
863 : ArrayRef<Type *> Tys, FastMathFlags FMF,
864 : unsigned ScalarizationCostPassed = UINT_MAX) const;
865 :
866 : /// \returns The cost of Call instructions.
867 : int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
868 :
869 : /// \returns The number of pieces into which the provided type must be
870 : /// split during legalization. Zero is returned when the answer is unknown.
871 : unsigned getNumberOfParts(Type *Tp) const;
872 :
873 : /// \returns The cost of the address computation. For most targets this can be
874 : /// merged into the instruction indexing mode. Some targets might want to
875 : /// distinguish between address computation for memory operations on vector
876 : /// types and scalar types. Such targets should override this function.
877 : /// The 'SE' parameter holds pointer for the scalar evolution object which
878 : /// is used in order to get the Ptr step value in case of constant stride.
879 : /// The 'Ptr' parameter holds SCEV of the access pointer.
880 : int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
881 : const SCEV *Ptr = nullptr) const;
882 :
883 : /// \returns The cost, if any, of keeping values of the given types alive
884 : /// over a callsite.
885 : ///
886 : /// Some types may require the use of register classes that do not have
887 : /// any callee-saved registers, so would require a spill and fill.
888 : unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
889 :
890 : /// \returns True if the intrinsic is a supported memory intrinsic. Info
891 : /// will contain additional information - whether the intrinsic may write
892 : /// or read to memory, volatility and the pointer. Info is undefined
893 : /// if false is returned.
894 : bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
895 :
896 : /// \returns The maximum element size, in bytes, for an element
897 : /// unordered-atomic memory intrinsic.
898 : unsigned getAtomicMemIntrinsicMaxElementSize() const;
899 :
900 : /// \returns A value which is the result of the given memory intrinsic. New
901 : /// instructions may be created to extract the result from the given intrinsic
902 : /// memory operation. Returns nullptr if the target cannot create a result
903 : /// from the given intrinsic.
904 : Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
905 : Type *ExpectedType) const;
906 :
907 : /// \returns The type to use in a loop expansion of a memcpy call.
908 : Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
909 : unsigned SrcAlign, unsigned DestAlign) const;
910 :
911 : /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
912 : /// \param RemainingBytes The number of bytes to copy.
913 : ///
914 : /// Calculates the operand types to use when copying \p RemainingBytes of
915 : /// memory, where source and destination alignments are \p SrcAlign and
916 : /// \p DestAlign respectively.
917 : void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
918 : LLVMContext &Context,
919 : unsigned RemainingBytes,
920 : unsigned SrcAlign,
921 : unsigned DestAlign) const;
922 :
923 : /// \returns True if the two functions have compatible attributes for inlining
924 : /// purposes.
925 : bool areInlineCompatible(const Function *Caller,
926 : const Function *Callee) const;
927 :
928 : /// The type of load/store indexing.
929 : enum MemIndexedMode {
930 : MIM_Unindexed, ///< No indexing.
931 : MIM_PreInc, ///< Pre-incrementing.
932 : MIM_PreDec, ///< Pre-decrementing.
933 : MIM_PostInc, ///< Post-incrementing.
934 : MIM_PostDec ///< Post-decrementing.
935 : };
936 :
937 : /// \returns True if the specified indexed load for the given type is legal.
938 : bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
939 :
940 : /// \returns True if the specified indexed store for the given type is legal.
941 : bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
942 :
943 : /// \returns The bitwidth of the largest vector type that should be used to
944 : /// load/store in the given address space.
945 : unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
946 :
947 : /// \returns True if the load instruction is legal to vectorize.
948 : bool isLegalToVectorizeLoad(LoadInst *LI) const;
949 :
950 : /// \returns True if the store instruction is legal to vectorize.
951 : bool isLegalToVectorizeStore(StoreInst *SI) const;
952 :
953 : /// \returns True if it is legal to vectorize the given load chain.
954 : bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
955 : unsigned Alignment,
956 : unsigned AddrSpace) const;
957 :
958 : /// \returns True if it is legal to vectorize the given store chain.
959 : bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
960 : unsigned Alignment,
961 : unsigned AddrSpace) const;
962 :
963 : /// \returns The new vector factor value if the target doesn't support \p
964 : /// SizeInBytes loads or has a better vector factor.
965 : unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
966 : unsigned ChainSizeInBytes,
967 : VectorType *VecTy) const;
968 :
969 : /// \returns The new vector factor value if the target doesn't support \p
970 : /// SizeInBytes stores or has a better vector factor.
971 : unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
972 : unsigned ChainSizeInBytes,
973 : VectorType *VecTy) const;
974 :
975 : /// Flags describing the kind of vector reduction.
976 : struct ReductionFlags {
977 203 : ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
978 : bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
979 : bool IsSigned; ///< Whether the operation is a signed int reduction.
980 : bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
981 : };
982 :
983 : /// \returns True if the target wants to handle the given reduction idiom in
984 : /// the intrinsics form instead of the shuffle form.
985 : bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
986 : ReductionFlags Flags) const;
987 :
988 : /// \returns True if the target wants to expand the given reduction intrinsic
989 : /// into a shuffle sequence.
990 : bool shouldExpandReduction(const IntrinsicInst *II) const;
991 : /// @}
992 :
993 : private:
994 : /// Estimate the latency of specified instruction.
995 : /// Returns 1 as the default value.
996 : int getInstructionLatency(const Instruction *I) const;
997 :
998 : /// Returns the expected throughput cost of the instruction.
999 : /// Returns -1 if the cost is unknown.
1000 : int getInstructionThroughput(const Instruction *I) const;
1001 :
1002 : /// The abstract base class used to type erase specific TTI
1003 : /// implementations.
1004 : class Concept;
1005 :
1006 : /// The template model for the base class which wraps a concrete
1007 : /// implementation in a type erased interface.
1008 : template <typename T> class Model;
1009 :
1010 : std::unique_ptr<Concept> TTIImpl;
1011 : };
1012 :
1013 : class TargetTransformInfo::Concept {
1014 : public:
1015 : virtual ~Concept() = 0;
1016 : virtual const DataLayout &getDataLayout() const = 0;
1017 : virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1018 : virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1019 : ArrayRef<const Value *> Operands) = 0;
1020 : virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1021 : virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
1022 : virtual int getCallCost(const Function *F, int NumArgs) = 0;
1023 : virtual int getCallCost(const Function *F,
1024 : ArrayRef<const Value *> Arguments) = 0;
1025 : virtual unsigned getInliningThresholdMultiplier() = 0;
1026 : virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1027 : ArrayRef<Type *> ParamTys) = 0;
1028 : virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1029 : ArrayRef<const Value *> Arguments) = 0;
1030 : virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1031 : unsigned &JTSize) = 0;
1032 : virtual int
1033 : getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1034 : virtual bool hasBranchDivergence() = 0;
1035 : virtual bool isSourceOfDivergence(const Value *V) = 0;
1036 : virtual bool isAlwaysUniform(const Value *V) = 0;
1037 : virtual unsigned getFlatAddressSpace() = 0;
1038 : virtual bool isLoweredToCall(const Function *F) = 0;
1039 : virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1040 : UnrollingPreferences &UP) = 0;
1041 : virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1042 : virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1043 : virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1044 : int64_t BaseOffset, bool HasBaseReg,
1045 : int64_t Scale,
1046 : unsigned AddrSpace,
1047 : Instruction *I) = 0;
1048 : virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1049 : TargetTransformInfo::LSRCost &C2) = 0;
1050 : virtual bool canMacroFuseCmp() = 0;
1051 : virtual bool shouldFavorPostInc() const = 0;
1052 : virtual bool isLegalMaskedStore(Type *DataType) = 0;
1053 : virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1054 : virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1055 : virtual bool isLegalMaskedGather(Type *DataType) = 0;
1056 : virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1057 : virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1058 : virtual bool prefersVectorizedAddressing() = 0;
1059 : virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1060 : int64_t BaseOffset, bool HasBaseReg,
1061 : int64_t Scale, unsigned AddrSpace) = 0;
1062 : virtual bool LSRWithInstrQueries() = 0;
1063 : virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1064 : virtual bool isProfitableToHoist(Instruction *I) = 0;
1065 : virtual bool useAA() = 0;
1066 : virtual bool isTypeLegal(Type *Ty) = 0;
1067 : virtual unsigned getJumpBufAlignment() = 0;
1068 : virtual unsigned getJumpBufSize() = 0;
1069 : virtual bool shouldBuildLookupTables() = 0;
1070 : virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1071 : virtual bool useColdCCForColdCall(Function &F) = 0;
1072 : virtual unsigned
1073 : getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1074 : virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1075 : unsigned VF) = 0;
1076 : virtual bool supportsEfficientVectorElementLoadStore() = 0;
1077 : virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1078 : virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1079 : bool IsZeroCmp) const = 0;
1080 : virtual bool enableInterleavedAccessVectorization() = 0;
1081 : virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1082 : virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1083 : virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1084 : unsigned BitWidth,
1085 : unsigned AddressSpace,
1086 : unsigned Alignment,
1087 : bool *Fast) = 0;
1088 : virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1089 : virtual bool haveFastSqrt(Type *Ty) = 0;
1090 : virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1091 : virtual int getFPOpCost(Type *Ty) = 0;
1092 : virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1093 : Type *Ty) = 0;
1094 : virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1095 : virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1096 : Type *Ty) = 0;
1097 : virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1098 : Type *Ty) = 0;
1099 : virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1100 : virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1101 : virtual unsigned getMinVectorRegisterBitWidth() = 0;
1102 : virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1103 : virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1104 : virtual bool shouldConsiderAddressTypePromotion(
1105 : const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1106 : virtual unsigned getCacheLineSize() = 0;
1107 : virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1108 : virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1109 : virtual unsigned getPrefetchDistance() = 0;
1110 : virtual unsigned getMinPrefetchStride() = 0;
1111 : virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1112 : virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1113 : virtual unsigned
1114 : getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1115 : OperandValueKind Opd2Info,
1116 : OperandValueProperties Opd1PropInfo,
1117 : OperandValueProperties Opd2PropInfo,
1118 : ArrayRef<const Value *> Args) = 0;
1119 : virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1120 : Type *SubTp) = 0;
1121 : virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1122 : const Instruction *I) = 0;
1123 : virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1124 : VectorType *VecTy, unsigned Index) = 0;
1125 : virtual int getCFInstrCost(unsigned Opcode) = 0;
1126 : virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1127 : Type *CondTy, const Instruction *I) = 0;
1128 : virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1129 : unsigned Index) = 0;
1130 : virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1131 : unsigned AddressSpace, const Instruction *I) = 0;
1132 : virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1133 : unsigned Alignment,
1134 : unsigned AddressSpace) = 0;
1135 : virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1136 : Value *Ptr, bool VariableMask,
1137 : unsigned Alignment) = 0;
1138 : virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1139 : unsigned Factor,
1140 : ArrayRef<unsigned> Indices,
1141 : unsigned Alignment,
1142 : unsigned AddressSpace,
1143 : bool IsMasked = false) = 0;
1144 : virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1145 : bool IsPairwiseForm) = 0;
1146 : virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1147 : bool IsPairwiseForm, bool IsUnsigned) = 0;
1148 : virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1149 : ArrayRef<Type *> Tys, FastMathFlags FMF,
1150 : unsigned ScalarizationCostPassed) = 0;
1151 : virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1152 : ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1153 : virtual int getCallInstrCost(Function *F, Type *RetTy,
1154 : ArrayRef<Type *> Tys) = 0;
1155 : virtual unsigned getNumberOfParts(Type *Tp) = 0;
1156 : virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1157 : const SCEV *Ptr) = 0;
1158 : virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1159 : virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1160 : MemIntrinsicInfo &Info) = 0;
1161 : virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1162 : virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1163 : Type *ExpectedType) = 0;
1164 : virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1165 : unsigned SrcAlign,
1166 : unsigned DestAlign) const = 0;
1167 : virtual void getMemcpyLoopResidualLoweringType(
1168 : SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1169 : unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1170 : virtual bool areInlineCompatible(const Function *Caller,
1171 : const Function *Callee) const = 0;
1172 : virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1173 : virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1174 : virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1175 : virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1176 : virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1177 : virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1178 : unsigned Alignment,
1179 : unsigned AddrSpace) const = 0;
1180 : virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1181 : unsigned Alignment,
1182 : unsigned AddrSpace) const = 0;
1183 : virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1184 : unsigned ChainSizeInBytes,
1185 : VectorType *VecTy) const = 0;
1186 : virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1187 : unsigned ChainSizeInBytes,
1188 : VectorType *VecTy) const = 0;
1189 : virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1190 : ReductionFlags) const = 0;
1191 : virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1192 : virtual int getInstructionLatency(const Instruction *I) = 0;
1193 : };
1194 :
1195 : template <typename T>
1196 : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1197 : T Impl;
1198 :
1199 : public:
1200 3073624 : Model(T Impl) : Impl(std::move(Impl)) {}
1201 3932259 : ~Model() override {}
1202 :
1203 0 : const DataLayout &getDataLayout() const override {
1204 7637 : return Impl.getDataLayout();
1205 : }
1206 0 :
1207 0 : int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1208 0 : return Impl.getOperationCost(Opcode, Ty, OpTy);
1209 0 : }
1210 369 : int getGEPCost(Type *PointeeType, const Value *Ptr,
1211 : ArrayRef<const Value *> Operands) override {
1212 369 : return Impl.getGEPCost(PointeeType, Ptr, Operands);
1213 0 : }
1214 0 : int getExtCost(const Instruction *I, const Value *Src) override {
1215 0 : return Impl.getExtCost(I, Src);
1216 0 : }
1217 0 : int getCallCost(FunctionType *FTy, int NumArgs) override {
1218 0 : return Impl.getCallCost(FTy, NumArgs);
1219 0 : }
1220 0 : int getCallCost(const Function *F, int NumArgs) override {
1221 0 : return Impl.getCallCost(F, NumArgs);
1222 34068 : }
1223 0 : int getCallCost(const Function *F,
1224 34068 : ArrayRef<const Value *> Arguments) override {
1225 0 : return Impl.getCallCost(F, Arguments);
1226 1823 : }
1227 288234 : unsigned getInliningThresholdMultiplier() override {
1228 290057 : return Impl.getInliningThresholdMultiplier();
1229 : }
1230 32245 : int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1231 : ArrayRef<Type *> ParamTys) override {
1232 32245 : return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1233 : }
1234 4 : int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1235 0 : ArrayRef<const Value *> Arguments) override {
1236 4 : return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1237 0 : }
1238 4065703 : int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1239 4065703 : return Impl.getUserCost(U, Operands);
1240 0 : }
1241 44000 : bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1242 90 : bool isSourceOfDivergence(const Value *V) override {
1243 90 : return Impl.isSourceOfDivergence(V);
1244 0 : }
1245 :
1246 28 : bool isAlwaysUniform(const Value *V) override {
1247 28 : return Impl.isAlwaysUniform(V);
1248 : }
1249 0 :
1250 1056 : unsigned getFlatAddressSpace() override {
1251 1056 : return Impl.getFlatAddressSpace();
1252 0 : }
1253 0 :
1254 997312 : bool isLoweredToCall(const Function *F) override {
1255 997312 : return Impl.isLoweredToCall(F);
1256 0 : }
1257 8845 : void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1258 0 : UnrollingPreferences &UP) override {
1259 8845 : return Impl.getUnrollingPreferences(L, SE, UP);
1260 : }
1261 25984 : bool isLegalAddImmediate(int64_t Imm) override {
1262 25984 : return Impl.isLegalAddImmediate(Imm);
1263 0 : }
1264 28650 : bool isLegalICmpImmediate(int64_t Imm) override {
1265 28650 : return Impl.isLegalICmpImmediate(Imm);
1266 : }
1267 570583 : bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1268 : bool HasBaseReg, int64_t Scale,
1269 0 : unsigned AddrSpace,
1270 : Instruction *I) override {
1271 0 : return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1272 570583 : Scale, AddrSpace, I);
1273 112 : }
1274 120262 : bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1275 : TargetTransformInfo::LSRCost &C2) override {
1276 120150 : return Impl.isLSRCostLess(C1, C2);
1277 0 : }
1278 34139 : bool canMacroFuseCmp() override {
1279 34251 : return Impl.canMacroFuseCmp();
1280 112 : }
1281 256451 : bool shouldFavorPostInc() const override {
1282 256451 : return Impl.shouldFavorPostInc();
1283 : }
1284 553 : bool isLegalMaskedStore(Type *DataType) override {
1285 553 : return Impl.isLegalMaskedStore(DataType);
1286 0 : }
1287 521 : bool isLegalMaskedLoad(Type *DataType) override {
1288 521 : return Impl.isLegalMaskedLoad(DataType);
1289 : }
1290 675 : bool isLegalMaskedScatter(Type *DataType) override {
1291 675 : return Impl.isLegalMaskedScatter(DataType);
1292 0 : }
1293 820 : bool isLegalMaskedGather(Type *DataType) override {
1294 820 : return Impl.isLegalMaskedGather(DataType);
1295 : }
1296 66 : bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1297 66 : return Impl.hasDivRemOp(DataType, IsSigned);
1298 0 : }
1299 330 : bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1300 330 : return Impl.hasVolatileVariant(I, AddrSpace);
1301 : }
1302 1407 : bool prefersVectorizedAddressing() override {
1303 1407 : return Impl.prefersVectorizedAddressing();
1304 0 : }
1305 125812 : int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1306 8969 : bool HasBaseReg, int64_t Scale,
1307 8969 : unsigned AddrSpace) override {
1308 : return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1309 126194 : Scale, AddrSpace);
1310 382 : }
1311 71140 : bool LSRWithInstrQueries() override {
1312 79727 : return Impl.LSRWithInstrQueries();
1313 8587 : }
1314 20899 : bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1315 121881 : return Impl.isTruncateFree(Ty1, Ty2);
1316 1444647 : }
1317 1450538 : bool isProfitableToHoist(Instruction *I) override {
1318 5891 : return Impl.isProfitableToHoist(I);
1319 0 : }
1320 552 : bool useAA() override { return Impl.useAA(); }
1321 767 : bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1322 1444647 : unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1323 1444647 : unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1324 1279 : bool shouldBuildLookupTables() override {
1325 1279 : return Impl.shouldBuildLookupTables();
1326 254054 : }
1327 257649 : bool shouldBuildLookupTablesForConstant(Constant *C) override {
1328 3595 : return Impl.shouldBuildLookupTablesForConstant(C);
1329 0 : }
1330 5 : bool useColdCCForColdCall(Function &F) override {
1331 5 : return Impl.useColdCCForColdCall(F);
1332 254054 : }
1333 254054 :
1334 837 : unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1335 : bool Extract) override {
1336 23061 : return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1337 22224 : }
1338 885 : unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1339 2382 : unsigned VF) override {
1340 3267 : return Impl.getOperandsScalarizationOverhead(Args, VF);
1341 : }
1342 19842 :
1343 20334 : bool supportsEfficientVectorElementLoadStore() override {
1344 492 : return Impl.supportsEfficientVectorElementLoadStore();
1345 : }
1346 363 :
1347 445 : bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1348 82 : return Impl.enableAggressiveInterleaving(LoopHasReductions);
1349 0 : }
1350 176872 : const MemCmpExpansionOptions *enableMemCmpExpansion(
1351 : bool IsZeroCmp) const override {
1352 177235 : return Impl.enableMemCmpExpansion(IsZeroCmp);
1353 363 : }
1354 979 : bool enableInterleavedAccessVectorization() override {
1355 999 : return Impl.enableInterleavedAccessVectorization();
1356 : }
1357 490 : bool enableMaskedInterleavedAccessVectorization() override {
1358 470 : return Impl.enableMaskedInterleavedAccessVectorization();
1359 3 : }
1360 158 : bool isFPVectorizationPotentiallyUnsafe() override {
1361 161 : return Impl.isFPVectorizationPotentiallyUnsafe();
1362 : }
1363 62 : bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1364 : unsigned BitWidth, unsigned AddressSpace,
1365 17 : unsigned Alignment, bool *Fast) override {
1366 : return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1367 365 : Alignment, Fast);
1368 320 : }
1369 3577 : PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1370 3591 : return Impl.getPopcntSupport(IntTyWidthInBit);
1371 14 : }
1372 58 : bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1373 306 :
1374 333 : bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1375 27 : return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1376 544 : }
1377 544 :
1378 535 : int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1379 74 :
1380 256 : int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1381 : Type *Ty) override {
1382 652 : return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1383 470 : }
1384 84 : int getIntImmCost(const APInt &Imm, Type *Ty) override {
1385 11110 : return Impl.getIntImmCost(Imm, Ty);
1386 : }
1387 516391 : int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1388 : Type *Ty) override {
1389 516456 : return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1390 11026 : }
1391 154016 : int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1392 523 : Type *Ty) override {
1393 154020 : return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1394 : }
1395 242165 : unsigned getNumberOfRegisters(bool Vector) override {
1396 242165 : return Impl.getNumberOfRegisters(Vector);
1397 523 : }
1398 13652 : unsigned getRegisterBitWidth(bool Vector) const override {
1399 24155 : return Impl.getRegisterBitWidth(Vector);
1400 : }
1401 11812 : unsigned getMinVectorRegisterBitWidth() override {
1402 11892 : return Impl.getMinVectorRegisterBitWidth();
1403 : }
1404 11406 : bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1405 903 : return Impl.shouldMaximizeVectorBandwidth(OptSize);
1406 942 : }
1407 2 : unsigned getMinimumVF(unsigned ElemWidth) const override {
1408 944 : return Impl.getMinimumVF(ElemWidth);
1409 : }
1410 32528 : bool shouldConsiderAddressTypePromotion(
1411 : const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1412 74 : return Impl.shouldConsiderAddressTypePromotion(
1413 32454 : I, AllowPromotionWithoutCommonHeader);
1414 868 : }
1415 51 : unsigned getCacheLineSize() override {
1416 919 : return Impl.getCacheLineSize();
1417 : }
1418 347 : llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1419 347 : return Impl.getCacheSize(Level);
1420 : }
1421 49 : llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1422 49 : return Impl.getCacheAssociativity(Level);
1423 : }
1424 21855 : unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1425 378 : unsigned getMinPrefetchStride() override {
1426 78 : return Impl.getMinPrefetchStride();
1427 2797 : }
1428 3026 : unsigned getMaxPrefetchIterationsAhead() override {
1429 229 : return Impl.getMaxPrefetchIterationsAhead();
1430 178 : }
1431 2189 : unsigned getMaxInterleaveFactor(unsigned VF) override {
1432 2044 : return Impl.getMaxInterleaveFactor(VF);
1433 2619 : }
1434 10615 : unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1435 : unsigned &JTSize) override {
1436 7998 : return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1437 2 : }
1438 : unsigned
1439 210389 : getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1440 0 : OperandValueKind Opd2Info,
1441 : OperandValueProperties Opd1PropInfo,
1442 2 : OperandValueProperties Opd2PropInfo,
1443 2 : ArrayRef<const Value *> Args) override {
1444 : return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1445 210389 : Opd1PropInfo, Opd2PropInfo, Args);
1446 0 : }
1447 5312 : int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1448 0 : Type *SubTp) override {
1449 5312 : return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1450 : }
1451 6903 : int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1452 0 : const Instruction *I) override {
1453 6903 : return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1454 2 : }
1455 34 : int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1456 : unsigned Index) override {
1457 32 : return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1458 0 : }
1459 11273 : int getCFInstrCost(unsigned Opcode) override {
1460 11275 : return Impl.getCFInstrCost(Opcode);
1461 2 : }
1462 5550 : int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1463 0 : const Instruction *I) override {
1464 5550 : return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1465 : }
1466 87172 : int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1467 87172 : return Impl.getVectorInstrCost(Opcode, Val, Index);
1468 : }
1469 395277 : int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1470 0 : unsigned AddressSpace, const Instruction *I) override {
1471 395277 : return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1472 0 : }
1473 141 : int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1474 : unsigned AddressSpace) override {
1475 141 : return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1476 0 : }
1477 132 : int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1478 0 : Value *Ptr, bool VariableMask,
1479 0 : unsigned Alignment) override {
1480 : return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1481 401 : Alignment);
1482 269 : }
1483 92 : int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1484 0 : ArrayRef<unsigned> Indices, unsigned Alignment,
1485 0 : unsigned AddressSpace, bool IsMasked) override {
1486 : return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1487 361 : Alignment, AddressSpace, IsMasked);
1488 269 : }
1489 261 : int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1490 3 : bool IsPairwiseForm) override {
1491 264 : return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1492 : }
1493 898 : int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1494 0 : bool IsPairwiseForm, bool IsUnsigned) override {
1495 898 : return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1496 3 : }
1497 949 : int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1498 : FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1499 444 : return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1500 946 : ScalarizationCostPassed);
1501 : }
1502 2837 : int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1503 444 : ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1504 2837 : return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1505 10 : }
1506 554 : int getCallInstrCost(Function *F, Type *RetTy,
1507 : ArrayRef<Type *> Tys) override {
1508 554 : return Impl.getCallInstrCost(F, RetTy, Tys);
1509 10 : }
1510 22477 : unsigned getNumberOfParts(Type *Tp) override {
1511 22911 : return Impl.getNumberOfParts(Tp);
1512 : }
1513 1582 : int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1514 : const SCEV *Ptr) override {
1515 2016 : return Impl.getAddressComputationCost(Ty, SE, Ptr);
1516 : }
1517 14248 : unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1518 14248 : return Impl.getCostOfKeepingLiveOverCall(Tys);
1519 : }
1520 380973 : bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1521 9 : MemIntrinsicInfo &Info) override {
1522 380964 : return Impl.getTgtMemIntrinsic(Inst, Info);
1523 453 : }
1524 462 : unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1525 9 : return Impl.getAtomicMemIntrinsicMaxElementSize();
1526 475 : }
1527 495 : Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1528 : Type *ExpectedType) override {
1529 46 : return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1530 26 : }
1531 15 : Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1532 449 : unsigned SrcAlign,
1533 449 : unsigned DestAlign) const override {
1534 15 : return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1535 0 : }
1536 0 : void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1537 : LLVMContext &Context,
1538 0 : unsigned RemainingBytes,
1539 0 : unsigned SrcAlign,
1540 : unsigned DestAlign) const override {
1541 0 : Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1542 0 : SrcAlign, DestAlign);
1543 0 : }
1544 334696 : bool areInlineCompatible(const Function *Caller,
1545 2 : const Function *Callee) const override {
1546 334696 : return Impl.areInlineCompatible(Caller, Callee);
1547 0 : }
1548 7649 : bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1549 7649 : return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1550 : }
1551 6 : bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1552 6 : return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1553 : }
1554 809 : unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1555 809 : return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1556 : }
1557 371 : bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1558 371 : return Impl.isLegalToVectorizeLoad(LI);
1559 : }
1560 332 : bool isLegalToVectorizeStore(StoreInst *SI) const override {
1561 332 : return Impl.isLegalToVectorizeStore(SI);
1562 : }
1563 34 : bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1564 0 : unsigned Alignment,
1565 : unsigned AddrSpace) const override {
1566 0 : return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1567 34 : AddrSpace);
1568 : }
1569 11 : bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1570 0 : unsigned Alignment,
1571 : unsigned AddrSpace) const override {
1572 0 : return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1573 11 : AddrSpace);
1574 : }
1575 94 : unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1576 0 : unsigned ChainSizeInBytes,
1577 : VectorType *VecTy) const override {
1578 94 : return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1579 : }
1580 42 : unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1581 : unsigned ChainSizeInBytes,
1582 0 : VectorType *VecTy) const override {
1583 42 : return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1584 0 : }
1585 306 : bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1586 0 : ReductionFlags Flags) const override {
1587 306 : return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1588 0 : }
1589 1741 : bool shouldExpandReduction(const IntrinsicInst *II) const override {
1590 1741 : return Impl.shouldExpandReduction(II);
1591 : }
1592 11 : int getInstructionLatency(const Instruction *I) override {
1593 11 : return Impl.getInstructionLatency(I);
1594 0 : }
1595 : };
1596 0 :
1597 : template <typename T>
1598 190309 : TargetTransformInfo::TargetTransformInfo(T Impl)
1599 2622743 : : TTIImpl(new Model<T>(Impl)) {}
1600 :
1601 0 : /// Analysis pass providing the \c TargetTransformInfo.
1602 0 : ///
1603 : /// The core idea of the TargetIRAnalysis is to expose an interface through
1604 0 : /// which LLVM targets can analyze and provide information about the middle
1605 0 : /// end's target-independent IR. This supports use cases such as target-aware
1606 : /// cost modeling of IR constructs.
1607 0 : ///
1608 0 : /// This is a function analysis because much of the cost modeling for targets
1609 : /// is done in a subtarget specific way and LLVM supports compiling different
1610 : /// functions targeting different subtargets in order to support runtime
1611 3 : /// dispatch according to the observed subtarget.
1612 3 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1613 : public:
1614 0 : typedef TargetTransformInfo Result;
1615 0 :
1616 : /// Default construct a target IR analysis.
1617 3 : ///
1618 3 : /// This will use the module's datalayout to construct a baseline
1619 : /// conservative TTI result.
1620 21444 : TargetIRAnalysis();
1621 :
1622 21444 : /// Construct an IR analysis pass around a target-provide callback.
1623 : ///
1624 2297 : /// The callback will be called with a particular function for which the TTI
1625 : /// is needed and must return a TTI object for that function.
1626 2297 : TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1627 :
1628 19147 : // Value semantics. We spell out the constructors for MSVC.
1629 : TargetIRAnalysis(const TargetIRAnalysis &Arg)
1630 19147 : : TTICallback(Arg.TTICallback) {}
1631 : TargetIRAnalysis(TargetIRAnalysis &&Arg)
1632 8 : : TTICallback(std::move(Arg.TTICallback)) {}
1633 8 : TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1634 : TTICallback = RHS.TTICallback;
1635 0 : return *this;
1636 0 : }
1637 : TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1638 8 : TTICallback = std::move(RHS.TTICallback);
1639 8 : return *this;
1640 : }
1641 0 :
1642 0 : Result run(const Function &F, FunctionAnalysisManager &);
1643 :
1644 0 : private:
1645 0 : friend AnalysisInfoMixin<TargetIRAnalysis>;
1646 : static AnalysisKey Key;
1647 0 :
1648 0 : /// The callback used to produce a result.
1649 : ///
1650 2 : /// We use a completely opaque callback so that targets can provide whatever
1651 2 : /// mechanism they desire for constructing the TTI for a given function.
1652 : ///
1653 0 : /// FIXME: Should we really use std::function? It's relatively inefficient.
1654 0 : /// It might be possible to arrange for even stateful callbacks to outlive
1655 : /// the analysis and thus use a function_ref which would be lighter weight.
1656 2 : /// This may also be less error prone as the callback is likely to reference
1657 2 : /// the external TargetMachine, and that reference needs to never dangle.
1658 : std::function<Result(const Function &)> TTICallback;
1659 8305 :
1660 : /// Helper function used as the callback in the default constructor.
1661 : static Result getDefaultTTI(const Function &F);
1662 : };
1663 8305 :
1664 : /// Wrapper pass for TargetTransformInfo.
1665 118 : ///
1666 : /// This pass can be constructed from a TTI object which it stores internally
1667 : /// and is queried by passes.
1668 : class TargetTransformInfoWrapperPass : public ImmutablePass {
1669 118 : TargetIRAnalysis TIRA;
1670 : Optional<TargetTransformInfo> TTI;
1671 8187 :
1672 : virtual void anchor();
1673 :
1674 : public:
1675 8187 : static char ID;
1676 :
1677 7 : /// We must provide a default constructor for the pass but it should
1678 7 : /// never be used.
1679 : ///
1680 0 : /// Use the constructor below or call one of the creation routines.
1681 0 : TargetTransformInfoWrapperPass();
1682 :
1683 7 : explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1684 7 :
1685 : TargetTransformInfo &getTTI(const Function &F);
1686 0 : };
1687 :
1688 0 : /// Create an analysis pass wrapper around a TTI object.
1689 0 : ///
1690 : /// This analysis pass just holds the TTI instance and makes it available to
1691 0 : /// clients.
1692 0 : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1693 :
1694 0 : } // End llvm namespace
1695 0 :
1696 : #endif
|