LLVM  6.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This pass exposes codegen information to IR-level passes. Every
11 /// transformation that uses codegen information is broken into three parts:
12 /// 1. The IR-level analysis pass.
13 /// 2. The IR-level transformation interface which provides the needed
14 /// information.
15 /// 3. Codegen-level implementation which uses target-specific hooks.
16 ///
17 /// This file defines #2, which is the interface that IR-level transformations
18 /// use for querying the codegen.
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24 
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/IR/Operator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/DataTypes.h"
31 #include <functional>
32 
33 namespace llvm {
34 
35 namespace Intrinsic {
36 enum ID : unsigned;
37 }
38 
39 class Function;
40 class GlobalValue;
41 class IntrinsicInst;
42 class LoadInst;
43 class Loop;
44 class SCEV;
45 class ScalarEvolution;
46 class StoreInst;
47 class SwitchInst;
48 class Type;
49 class User;
50 class Value;
51 
52 /// \brief Information about a load/store intrinsic defined by the target.
54  /// This is the pointer that the intrinsic is loading from or storing to.
55  /// If this is non-null, then analysis/optimization passes can assume that
56  /// this intrinsic is functionally equivalent to a load/store from this
57  /// pointer.
58  Value *PtrVal = nullptr;
59 
60  // Ordering for atomic operations.
62 
63  // Same Id is set by the target for corresponding load/store intrinsics.
64  unsigned short MatchingId = 0;
65 
66  bool ReadMem = false;
67  bool WriteMem = false;
68  bool IsVolatile = false;
69 
70  bool isUnordered() const {
71  return (Ordering == AtomicOrdering::NotAtomic ||
72  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
73  }
74 };
75 
76 /// \brief This pass provides access to the codegen interfaces that are needed
77 /// for IR-level transformations.
79 public:
80  /// \brief Construct a TTI object using a type implementing the \c Concept
81  /// API below.
82  ///
83  /// This is used by targets to construct a TTI wrapping their target-specific
84  /// implementaion that encodes appropriate costs for their target.
85  template <typename T> TargetTransformInfo(T Impl);
86 
87  /// \brief Construct a baseline TTI object using a minimal implementation of
88  /// the \c Concept API below.
89  ///
90  /// The TTI implementation will reflect the information in the DataLayout
91  /// provided if non-null.
92  explicit TargetTransformInfo(const DataLayout &DL);
93 
94  // Provide move semantics.
96  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
97 
98  // We need to define the destructor out-of-line to define our sub-classes
99  // out-of-line.
101 
102  /// \brief Handle the invalidation of this information.
103  ///
104  /// When used as a result of \c TargetIRAnalysis this method will be called
105  /// when the function this was computed for changes. When it returns false,
106  /// the information is preserved across those changes.
109  // FIXME: We should probably in some way ensure that the subtarget
110  // information for a function hasn't changed.
111  return false;
112  }
113 
114  /// \name Generic Target Information
115  /// @{
116 
117  /// \brief The kind of cost model.
118  ///
119  /// There are several different cost models that can be customized by the
120  /// target. The normalization of each cost model may be target specific.
122  TCK_RecipThroughput, ///< Reciprocal throughput.
123  TCK_Latency, ///< The latency of instruction.
124  TCK_CodeSize ///< Instruction code size.
125  };
126 
127  /// \brief Query the cost of a specified instruction.
128  ///
129  /// Clients should use this interface to query the cost of an existing
130  /// instruction. The instruction must have a valid parent (basic block).
131  ///
132  /// Note, this method does not cache the cost calculation and it
133  /// can be expensive in some cases.
134  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
135  switch (kind){
136  case TCK_RecipThroughput:
137  return getInstructionThroughput(I);
138 
139  case TCK_Latency:
140  return getInstructionLatency(I);
141 
142  case TCK_CodeSize:
143  return getUserCost(I);
144  }
145  llvm_unreachable("Unknown instruction cost kind");
146  }
147 
148  /// \brief Underlying constants for 'cost' values in this interface.
149  ///
150  /// Many APIs in this interface return a cost. This enum defines the
151  /// fundamental values that should be used to interpret (and produce) those
152  /// costs. The costs are returned as an int rather than a member of this
153  /// enumeration because it is expected that the cost of one IR instruction
154  /// may have a multiplicative factor to it or otherwise won't fit directly
155  /// into the enum. Moreover, it is common to sum or average costs which works
156  /// better as simple integral values. Thus this enum only provides constants.
157  /// Also note that the returned costs are signed integers to make it natural
158  /// to add, subtract, and test with zero (a common boundary condition). It is
159  /// not expected that 2^32 is a realistic cost to be modeling at any point.
160  ///
161  /// Note that these costs should usually reflect the intersection of code-size
162  /// cost and execution cost. A free instruction is typically one that folds
163  /// into another instruction. For example, reg-to-reg moves can often be
164  /// skipped by renaming the registers in the CPU, but they still are encoded
165  /// and thus wouldn't be considered 'free' here.
167  TCC_Free = 0, ///< Expected to fold away in lowering.
168  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
169  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
170  };
171 
172  /// \brief Estimate the cost of a specific operation when lowered.
173  ///
174  /// Note that this is designed to work on an arbitrary synthetic opcode, and
175  /// thus work for hypothetical queries before an instruction has even been
176  /// formed. However, this does *not* work for GEPs, and must not be called
177  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
178  /// analyzing a GEP's cost required more information.
179  ///
180  /// Typically only the result type is required, and the operand type can be
181  /// omitted. However, if the opcode is one of the cast instructions, the
182  /// operand type is required.
183  ///
184  /// The returned cost is defined in terms of \c TargetCostConstants, see its
185  /// comments for a detailed explanation of the cost values.
186  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
187 
188  /// \brief Estimate the cost of a GEP operation when lowered.
189  ///
190  /// The contract for this function is the same as \c getOperationCost except
191  /// that it supports an interface that provides extra information specific to
192  /// the GEP operation.
193  int getGEPCost(Type *PointeeType, const Value *Ptr,
194  ArrayRef<const Value *> Operands) const;
195 
196  /// \brief Estimate the cost of a EXT operation when lowered.
197  ///
198  /// The contract for this function is the same as \c getOperationCost except
199  /// that it supports an interface that provides extra information specific to
200  /// the EXT operation.
201  int getExtCost(const Instruction *I, const Value *Src) const;
202 
203  /// \brief Estimate the cost of a function call when lowered.
204  ///
205  /// The contract for this is the same as \c getOperationCost except that it
206  /// supports an interface that provides extra information specific to call
207  /// instructions.
208  ///
209  /// This is the most basic query for estimating call cost: it only knows the
210  /// function type and (potentially) the number of arguments at the call site.
211  /// The latter is only interesting for varargs function types.
212  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
213 
214  /// \brief Estimate the cost of calling a specific function when lowered.
215  ///
216  /// This overload adds the ability to reason about the particular function
217  /// being called in the event it is a library call with special lowering.
218  int getCallCost(const Function *F, int NumArgs = -1) const;
219 
220  /// \brief Estimate the cost of calling a specific function when lowered.
221  ///
222  /// This overload allows specifying a set of candidate argument values.
223  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
224 
225  /// \returns A value by which our inlining threshold should be multiplied.
226  /// This is primarily used to bump up the inlining threshold wholesale on
227  /// targets where calls are unusually expensive.
228  ///
229  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
230  /// individual classes of instructions would be better.
231  unsigned getInliningThresholdMultiplier() const;
232 
233  /// \brief Estimate the cost of an intrinsic when lowered.
234  ///
235  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
236  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
237  ArrayRef<Type *> ParamTys) const;
238 
239  /// \brief Estimate the cost of an intrinsic when lowered.
240  ///
241  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
242  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
244 
245  /// \return The estimated number of case clusters when lowering \p 'SI'.
246  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
247  /// table.
248  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
249  unsigned &JTSize) const;
250 
251  /// \brief Estimate the cost of a given IR user when lowered.
252  ///
253  /// This can estimate the cost of either a ConstantExpr or Instruction when
254  /// lowered. It has two primary advantages over the \c getOperationCost and
255  /// \c getGEPCost above, and one significant disadvantage: it can only be
256  /// used when the IR construct has already been formed.
257  ///
258  /// The advantages are that it can inspect the SSA use graph to reason more
259  /// accurately about the cost. For example, all-constant-GEPs can often be
260  /// folded into a load or other instruction, but if they are used in some
261  /// other context they may not be folded. This routine can distinguish such
262  /// cases.
263  ///
264  /// \p Operands is a list of operands which can be a result of transformations
265  /// of the current operands. The number of the operands on the list must equal
266  /// to the number of the current operands the IR user has. Their order on the
267  /// list must be the same as the order of the current operands the IR user
268  /// has.
269  ///
270  /// The returned cost is defined in terms of \c TargetCostConstants, see its
271  /// comments for a detailed explanation of the cost values.
272  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
273 
274  /// \brief This is a helper function which calls the two-argument getUserCost
275  /// with \p Operands which are the current operands U has.
276  int getUserCost(const User *U) const {
278  U->value_op_end());
279  return getUserCost(U, Operands);
280  }
281 
282  /// \brief Return true if branch divergence exists.
283  ///
284  /// Branch divergence has a significantly negative impact on GPU performance
285  /// when threads in the same wavefront take different paths due to conditional
286  /// branches.
287  bool hasBranchDivergence() const;
288 
289  /// \brief Returns whether V is a source of divergence.
290  ///
291  /// This function provides the target-dependent information for
292  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
293  /// builds the dependency graph, and then runs the reachability algorithm
294  /// starting with the sources of divergence.
295  bool isSourceOfDivergence(const Value *V) const;
296 
297  // \brief Returns true for the target specific
298  // set of operations which produce uniform result
299  // even taking non-unform arguments
300  bool isAlwaysUniform(const Value *V) const;
301 
302  /// Returns the address space ID for a target's 'flat' address space. Note
303  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
304  /// refers to as the generic address space. The flat address space is a
305  /// generic address space that can be used access multiple segments of memory
306  /// with different address spaces. Access of a memory location through a
307  /// pointer with this address space is expected to be legal but slower
308  /// compared to the same memory location accessed through a pointer with a
309  /// different address space.
310  //
311  /// This is for for targets with different pointer representations which can
312  /// be converted with the addrspacecast instruction. If a pointer is converted
313  /// to this address space, optimizations should attempt to replace the access
314  /// with the source address space.
315  ///
316  /// \returns ~0u if the target does not have such a flat address space to
317  /// optimize away.
318  unsigned getFlatAddressSpace() const;
319 
320  /// \brief Test whether calls to a function lower to actual program function
321  /// calls.
322  ///
323  /// The idea is to test whether the program is likely to require a 'call'
324  /// instruction or equivalent in order to call the given function.
325  ///
326  /// FIXME: It's not clear that this is a good or useful query API. Client's
327  /// should probably move to simpler cost metrics using the above.
328  /// Alternatively, we could split the cost interface into distinct code-size
329  /// and execution-speed costs. This would allow modelling the core of this
330  /// query more accurately as a call is a single small instruction, but
331  /// incurs significant execution cost.
332  bool isLoweredToCall(const Function *F) const;
333 
334  struct LSRCost {
335  /// TODO: Some of these could be merged. Also, a lexical ordering
336  /// isn't always optimal.
337  unsigned Insns;
338  unsigned NumRegs;
339  unsigned AddRecCost;
340  unsigned NumIVMuls;
341  unsigned NumBaseAdds;
342  unsigned ImmCost;
343  unsigned SetupCost;
344  unsigned ScaleCost;
345  };
346 
347  /// Parameters that control the generic loop unrolling transformation.
349  /// The cost threshold for the unrolled loop. Should be relative to the
350  /// getUserCost values returned by this API, and the expectation is that
351  /// the unrolled loop's instructions when run through that interface should
352  /// not exceed this cost. However, this is only an estimate. Also, specific
353  /// loops may be unrolled even with a cost above this threshold if deemed
354  /// profitable. Set this to UINT_MAX to disable the loop body cost
355  /// restriction.
356  unsigned Threshold;
357  /// If complete unrolling will reduce the cost of the loop, we will boost
358  /// the Threshold by a certain percent to allow more aggressive complete
359  /// unrolling. This value provides the maximum boost percentage that we
360  /// can apply to Threshold (The value should be no less than 100).
361  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
362  /// MaxPercentThresholdBoost / 100)
363  /// E.g. if complete unrolling reduces the loop execution time by 50%
364  /// then we boost the threshold by the factor of 2x. If unrolling is not
365  /// expected to reduce the running time, then we do not increase the
366  /// threshold.
368  /// The cost threshold for the unrolled loop when optimizing for size (set
369  /// to UINT_MAX to disable).
371  /// The cost threshold for the unrolled loop, like Threshold, but used
372  /// for partial/runtime unrolling (set to UINT_MAX to disable).
374  /// The cost threshold for the unrolled loop when optimizing for size, like
375  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
376  /// UINT_MAX to disable).
378  /// A forced unrolling factor (the number of concatenated bodies of the
379  /// original loop in the unrolled loop body). When set to 0, the unrolling
380  /// transformation will select an unrolling factor based on the current cost
381  /// threshold and other factors.
382  unsigned Count;
383  /// A forced peeling factor (the number of bodied of the original loop
384  /// that should be peeled off before the loop body). When set to 0, the
385  /// unrolling transformation will select a peeling factor based on profile
386  /// information and other factors.
387  unsigned PeelCount;
388  /// Default unroll count for loops with run-time trip count.
390  // Set the maximum unrolling factor. The unrolling factor may be selected
391  // using the appropriate cost threshold, but may not exceed this number
392  // (set to UINT_MAX to disable). This does not apply in cases where the
393  // loop is being fully unrolled.
394  unsigned MaxCount;
395  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
396  /// applies even if full unrolling is selected. This allows a target to fall
397  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
399  // Represents number of instructions optimized when "back edge"
400  // becomes "fall through" in unrolled loop.
401  // For now we count a conditional branch on a backedge and a comparison
402  // feeding it.
403  unsigned BEInsns;
404  /// Allow partial unrolling (unrolling of loops to expand the size of the
405  /// loop body, not only to eliminate small constant-trip-count loops).
406  bool Partial;
407  /// Allow runtime unrolling (unrolling of loops to expand the size of the
408  /// loop body even when the number of loop iterations is not known at
409  /// compile time).
410  bool Runtime;
411  /// Allow generation of a loop remainder (extra iterations after unroll).
413  /// Allow emitting expensive instructions (such as divisions) when computing
414  /// the trip count of a loop for runtime unrolling.
416  /// Apply loop unroll on any kind of loop
417  /// (mainly to loops that fail runtime unrolling).
418  bool Force;
419  /// Allow using trip count upper bound to unroll loops.
421  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
423  /// Allow unrolling of all the iterations of the runtime loop remainder.
425  };
426 
427  /// \brief Get target-customized preferences for the generic loop unrolling
428  /// transformation. The caller will initialize UP with the current
429  /// target-independent defaults.
430  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
431  UnrollingPreferences &UP) const;
432 
433  /// @}
434 
435  /// \name Scalar Target Information
436  /// @{
437 
438  /// \brief Flags indicating the kind of support for population count.
439  ///
440  /// Compared to the SW implementation, HW support is supposed to
441  /// significantly boost the performance when the population is dense, and it
442  /// may or may not degrade performance if the population is sparse. A HW
443  /// support is considered as "Fast" if it can outperform, or is on a par
444  /// with, SW implementation when the population is sparse; otherwise, it is
445  /// considered as "Slow".
446  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
447 
448  /// \brief Return true if the specified immediate is legal add immediate, that
449  /// is the target has add instructions which can add a register with the
450  /// immediate without having to materialize the immediate into a register.
451  bool isLegalAddImmediate(int64_t Imm) const;
452 
453  /// \brief Return true if the specified immediate is legal icmp immediate,
454  /// that is the target has icmp instructions which can compare a register
455  /// against the immediate without having to materialize the immediate into a
456  /// register.
457  bool isLegalICmpImmediate(int64_t Imm) const;
458 
459  /// \brief Return true if the addressing mode represented by AM is legal for
460  /// this target, for a load/store of the specified type.
461  /// The type may be VoidTy, in which case only return true if the addressing
462  /// mode is legal for a load/store of any legal type.
463  /// If target returns true in LSRWithInstrQueries(), I may be valid.
464  /// TODO: Handle pre/postinc as well.
465  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
466  bool HasBaseReg, int64_t Scale,
467  unsigned AddrSpace = 0,
468  Instruction *I = nullptr) const;
469 
470  /// \brief Return true if LSR cost of C1 is lower than C1.
471  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
472  TargetTransformInfo::LSRCost &C2) const;
473 
474  /// \brief Return true if the target supports masked load/store
475  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
476  bool isLegalMaskedStore(Type *DataType) const;
477  bool isLegalMaskedLoad(Type *DataType) const;
478 
479  /// \brief Return true if the target supports masked gather/scatter
480  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
481  /// bits scalar type.
482  bool isLegalMaskedScatter(Type *DataType) const;
483  bool isLegalMaskedGather(Type *DataType) const;
484 
485  /// Return true if the target has a unified operation to calculate division
486  /// and remainder. If so, the additional implicit multiplication and
487  /// subtraction required to calculate a remainder from division are free. This
488  /// can enable more aggressive transformations for division and remainder than
489  /// would typically be allowed using throughput or size cost models.
490  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
491 
492  /// Return true if the given instruction (assumed to be a memory access
493  /// instruction) has a volatile variant. If that's the case then we can avoid
494  /// addrspacecast to generic AS for volatile loads/stores. Default
495  /// implementation returns false, which prevents address space inference for
496  /// volatile loads/stores.
497  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
498 
499  /// Return true if target doesn't mind addresses in vectors.
500  bool prefersVectorizedAddressing() const;
501 
502  /// \brief Return the cost of the scaling factor used in the addressing
503  /// mode represented by AM for this target, for a load/store
504  /// of the specified type.
505  /// If the AM is supported, the return value must be >= 0.
506  /// If the AM is not supported, it returns a negative value.
507  /// TODO: Handle pre/postinc as well.
508  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
509  bool HasBaseReg, int64_t Scale,
510  unsigned AddrSpace = 0) const;
511 
512  /// \brief Return true if the loop strength reduce pass should make
513  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
514  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
515  /// immediate offset and no index register.
516  bool LSRWithInstrQueries() const;
517 
518  /// \brief Return true if it's free to truncate a value of type Ty1 to type
519  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
520  /// by referencing its sub-register AX.
521  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
522 
523  /// \brief Return true if it is profitable to hoist instruction in the
524  /// then/else to before if.
525  bool isProfitableToHoist(Instruction *I) const;
526 
527  /// \brief Return true if this type is legal.
528  bool isTypeLegal(Type *Ty) const;
529 
530  /// \brief Returns the target's jmp_buf alignment in bytes.
531  unsigned getJumpBufAlignment() const;
532 
533  /// \brief Returns the target's jmp_buf size in bytes.
534  unsigned getJumpBufSize() const;
535 
536  /// \brief Return true if switches should be turned into lookup tables for the
537  /// target.
538  bool shouldBuildLookupTables() const;
539 
540  /// \brief Return true if switches should be turned into lookup tables
541  /// containing this constant value for the target.
542  bool shouldBuildLookupTablesForConstant(Constant *C) const;
543 
544  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
545 
546  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
547  unsigned VF) const;
548 
549  /// If target has efficient vector element load/store instructions, it can
550  /// return true here so that insertion/extraction costs are not added to
551  /// the scalarization cost of a load/store.
552  bool supportsEfficientVectorElementLoadStore() const;
553 
554  /// \brief Don't restrict interleaved unrolling to small loops.
555  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
556 
557  /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
558  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
560  // The list of available load sizes (in bytes), sorted in decreasing order.
562  };
563  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
564 
565  /// \brief Enable matching of interleaved access groups.
566  bool enableInterleavedAccessVectorization() const;
567 
568  /// \brief Indicate that it is potentially unsafe to automatically vectorize
569  /// floating-point operations because the semantics of vector and scalar
570  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
571  /// does not support IEEE-754 denormal numbers, while depending on the
572  /// platform, scalar floating-point math does.
573  /// This applies to floating-point math operations and calls, not memory
574  /// operations, shuffles, or casts.
575  bool isFPVectorizationPotentiallyUnsafe() const;
576 
577  /// \brief Determine if the target supports unaligned memory accesses.
578  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
579  unsigned BitWidth, unsigned AddressSpace = 0,
580  unsigned Alignment = 1,
581  bool *Fast = nullptr) const;
582 
583  /// \brief Return hardware support for population count.
584  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
585 
586  /// \brief Return true if the hardware has a fast square-root instruction.
587  bool haveFastSqrt(Type *Ty) const;
588 
589  /// \brief Return the expected cost of supporting the floating point operation
590  /// of the specified type.
591  int getFPOpCost(Type *Ty) const;
592 
593  /// \brief Return the expected cost of materializing for the given integer
594  /// immediate of the specified type.
595  int getIntImmCost(const APInt &Imm, Type *Ty) const;
596 
597  /// \brief Return the expected cost of materialization for the given integer
598  /// immediate of the specified type for a given instruction. The cost can be
599  /// zero if the immediate can be folded into the specified instruction.
600  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
601  Type *Ty) const;
602  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
603  Type *Ty) const;
604 
605  /// \brief Return the expected cost for the given integer when optimising
606  /// for size. This is different than the other integer immediate cost
607  /// functions in that it is subtarget agnostic. This is useful when you e.g.
608  /// target one ISA such as Aarch32 but smaller encodings could be possible
609  /// with another such as Thumb. This return value is used as a penalty when
610  /// the total costs for a constant is calculated (the bigger the cost, the
611  /// more beneficial constant hoisting is).
612  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
613  Type *Ty) const;
614  /// @}
615 
616  /// \name Vector Target Information
617  /// @{
618 
619  /// \brief The various kinds of shuffle patterns for vector queries.
620  enum ShuffleKind {
621  SK_Broadcast, ///< Broadcast element 0 to all other elements.
622  SK_Reverse, ///< Reverse the order of the vector.
623  SK_Alternate, ///< Choose alternate elements from vector.
624  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
625  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
626  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
627  ///< with any shuffle mask.
628  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
629  ///< shuffle mask.
630  };
631 
632  /// \brief Additional information about an operand's possible values.
634  OK_AnyValue, // Operand can have any value.
635  OK_UniformValue, // Operand is uniform (splat of a value).
636  OK_UniformConstantValue, // Operand is uniform constant.
637  OK_NonUniformConstantValue // Operand is a non uniform constant value.
638  };
639 
640  /// \brief Additional properties of an operand's values.
641  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
642 
643  /// \return The number of scalar or vector registers that the target has.
644  /// If 'Vectors' is true, it returns the number of vector registers. If it is
645  /// set to false, it returns the number of scalar registers.
646  unsigned getNumberOfRegisters(bool Vector) const;
647 
648  /// \return The width of the largest scalar or vector register type.
649  unsigned getRegisterBitWidth(bool Vector) const;
650 
651  /// \return The width of the smallest vector register type.
652  unsigned getMinVectorRegisterBitWidth() const;
653 
654  /// \return True if it should be considered for address type promotion.
655  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
656  /// profitable without finding other extensions fed by the same input.
657  bool shouldConsiderAddressTypePromotion(
658  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
659 
660  /// \return The size of a cache line in bytes.
661  unsigned getCacheLineSize() const;
662 
663  /// The possible cache levels
664  enum class CacheLevel {
665  L1D, // The L1 data cache
666  L2D, // The L2 data cache
667 
668  // We currently do not model L3 caches, as their sizes differ widely between
669  // microarchitectures. Also, we currently do not have a use for L3 cache
670  // size modeling yet.
671  };
672 
673  /// \return The size of the cache level in bytes, if available.
674  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
675 
676  /// \return The associativity of the cache level, if available.
677  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
678 
679  /// \return How much before a load we should place the prefetch instruction.
680  /// This is currently measured in number of instructions.
681  unsigned getPrefetchDistance() const;
682 
683  /// \return Some HW prefetchers can handle accesses up to a certain constant
684  /// stride. This is the minimum stride in bytes where it makes sense to start
685  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
686  unsigned getMinPrefetchStride() const;
687 
688  /// \return The maximum number of iterations to prefetch ahead. If the
689  /// required number of iterations is more than this number, no prefetching is
690  /// performed.
691  unsigned getMaxPrefetchIterationsAhead() const;
692 
693  /// \return The maximum interleave factor that any transform should try to
694  /// perform for this target. This number depends on the level of parallelism
695  /// and the number of execution units in the CPU.
696  unsigned getMaxInterleaveFactor(unsigned VF) const;
697 
698  /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
699  /// \p Args is an optional argument which holds the instruction operands
700  /// values so the TTI can analyize those values searching for special
701  /// cases\optimizations based on those values.
702  int getArithmeticInstrCost(
703  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
704  OperandValueKind Opd2Info = OK_AnyValue,
705  OperandValueProperties Opd1PropInfo = OP_None,
706  OperandValueProperties Opd2PropInfo = OP_None,
708 
709  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
710  /// The index and subtype parameters are used by the subvector insertion and
711  /// extraction shuffle kinds.
712  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
713  Type *SubTp = nullptr) const;
714 
715  /// \return The expected cost of cast instructions, such as bitcast, trunc,
716  /// zext, etc. If there is an existing instruction that holds Opcode, it
717  /// may be passed in the 'I' parameter.
718  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
719  const Instruction *I = nullptr) const;
720 
721  /// \return The expected cost of a sign- or zero-extended vector extract. Use
722  /// -1 to indicate that there is no information about the index value.
723  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
724  unsigned Index = -1) const;
725 
726  /// \return The expected cost of control-flow related instructions such as
727  /// Phi, Ret, Br.
728  int getCFInstrCost(unsigned Opcode) const;
729 
730  /// \returns The expected cost of compare and select instructions. If there
731  /// is an existing instruction that holds Opcode, it may be passed in the
732  /// 'I' parameter.
733  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
734  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
735 
736  /// \return The expected cost of vector Insert and Extract.
737  /// Use -1 to indicate that there is no information on the index value.
738  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
739 
740  /// \return The cost of Load and Store instructions.
741  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
742  unsigned AddressSpace, const Instruction *I = nullptr) const;
743 
744  /// \return The cost of masked Load and Store instructions.
745  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
746  unsigned AddressSpace) const;
747 
748  /// \return The cost of Gather or Scatter operation
749  /// \p Opcode - is a type of memory access Load or Store
750  /// \p DataTy - a vector type of the data to be loaded or stored
751  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
752  /// \p VariableMask - true when the memory access is predicated with a mask
753  /// that is not a compile-time constant
754  /// \p Alignment - alignment of single element
755  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
756  bool VariableMask, unsigned Alignment) const;
757 
758  /// \return The cost of the interleaved memory operation.
759  /// \p Opcode is the memory operation code
760  /// \p VecTy is the vector type of the interleaved access.
761  /// \p Factor is the interleave factor
762  /// \p Indices is the indices for interleaved load members (as interleaved
763  /// load allows gaps)
764  /// \p Alignment is the alignment of the memory operation
765  /// \p AddressSpace is address space of the pointer.
766  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
767  ArrayRef<unsigned> Indices, unsigned Alignment,
768  unsigned AddressSpace) const;
769 
770  /// \brief Calculate the cost of performing a vector reduction.
771  ///
772  /// This is the cost of reducing the vector value of type \p Ty to a scalar
773  /// value using the operation denoted by \p Opcode. The form of the reduction
774  /// can either be a pairwise reduction or a reduction that splits the vector
775  /// at every reduction level.
776  ///
777  /// Pairwise:
778  /// (v0, v1, v2, v3)
779  /// ((v0+v1), (v2+v3), undef, undef)
780  /// Split:
781  /// (v0, v1, v2, v3)
782  /// ((v0+v2), (v1+v3), undef, undef)
783  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
784  bool IsPairwiseForm) const;
785  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
786  bool IsUnsigned) const;
787 
788  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
789  /// Three cases are handled: 1. scalar instruction 2. vector instruction
790  /// 3. scalar instruction which is to be vectorized with VF.
791  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
793  unsigned VF = 1) const;
794 
795  /// \returns The cost of Intrinsic instructions. Types analysis only.
796  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
797  /// arguments and the return value will be computed based on types.
798  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
800  unsigned ScalarizationCostPassed = UINT_MAX) const;
801 
802  /// \returns The cost of Call instructions.
803  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
804 
805  /// \returns The number of pieces into which the provided type must be
806  /// split during legalization. Zero is returned when the answer is unknown.
807  unsigned getNumberOfParts(Type *Tp) const;
808 
809  /// \returns The cost of the address computation. For most targets this can be
810  /// merged into the instruction indexing mode. Some targets might want to
811  /// distinguish between address computation for memory operations on vector
812  /// types and scalar types. Such targets should override this function.
813  /// The 'SE' parameter holds pointer for the scalar evolution object which
814  /// is used in order to get the Ptr step value in case of constant stride.
815  /// The 'Ptr' parameter holds SCEV of the access pointer.
816  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
817  const SCEV *Ptr = nullptr) const;
818 
819  /// \returns The cost, if any, of keeping values of the given types alive
820  /// over a callsite.
821  ///
822  /// Some types may require the use of register classes that do not have
823  /// any callee-saved registers, so would require a spill and fill.
824  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
825 
826  /// \returns True if the intrinsic is a supported memory intrinsic. Info
827  /// will contain additional information - whether the intrinsic may write
828  /// or read to memory, volatility and the pointer. Info is undefined
829  /// if false is returned.
830  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
831 
832  /// \returns The maximum element size, in bytes, for an element
833  /// unordered-atomic memory intrinsic.
834  unsigned getAtomicMemIntrinsicMaxElementSize() const;
835 
836  /// \returns A value which is the result of the given memory intrinsic. New
837  /// instructions may be created to extract the result from the given intrinsic
838  /// memory operation. Returns nullptr if the target cannot create a result
839  /// from the given intrinsic.
840  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
841  Type *ExpectedType) const;
842 
843  /// \returns The type to use in a loop expansion of a memcpy call.
844  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
845  unsigned SrcAlign, unsigned DestAlign) const;
846 
847  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
848  /// \param RemainingBytes The number of bytes to copy.
849  ///
850  /// Calculates the operand types to use when copying \p RemainingBytes of
851  /// memory, where source and destination alignments are \p SrcAlign and
852  /// \p DestAlign respectively.
853  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
854  LLVMContext &Context,
855  unsigned RemainingBytes,
856  unsigned SrcAlign,
857  unsigned DestAlign) const;
858 
859  /// \returns True if we want to test the new memcpy lowering functionality in
860  /// Transform/Utils.
861  /// Temporary. Will be removed once we move to the new functionality and
862  /// remove the old.
863  bool useWideIRMemcpyLoopLowering() const;
864 
865  /// \returns True if the two functions have compatible attributes for inlining
866  /// purposes.
867  bool areInlineCompatible(const Function *Caller,
868  const Function *Callee) const;
869 
870  /// \returns The bitwidth of the largest vector type that should be used to
871  /// load/store in the given address space.
872  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
873 
874  /// \returns True if the load instruction is legal to vectorize.
875  bool isLegalToVectorizeLoad(LoadInst *LI) const;
876 
877  /// \returns True if the store instruction is legal to vectorize.
878  bool isLegalToVectorizeStore(StoreInst *SI) const;
879 
880  /// \returns True if it is legal to vectorize the given load chain.
881  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
882  unsigned Alignment,
883  unsigned AddrSpace) const;
884 
885  /// \returns True if it is legal to vectorize the given store chain.
886  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
887  unsigned Alignment,
888  unsigned AddrSpace) const;
889 
890  /// \returns The new vector factor value if the target doesn't support \p
891  /// SizeInBytes loads or has a better vector factor.
892  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
893  unsigned ChainSizeInBytes,
894  VectorType *VecTy) const;
895 
896  /// \returns The new vector factor value if the target doesn't support \p
897  /// SizeInBytes stores or has a better vector factor.
898  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
899  unsigned ChainSizeInBytes,
900  VectorType *VecTy) const;
901 
902  /// Flags describing the kind of vector reduction.
903  struct ReductionFlags {
904  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
905  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
906  bool IsSigned; ///< Whether the operation is a signed int reduction.
907  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
908  };
909 
910  /// \returns True if the target wants to handle the given reduction idiom in
911  /// the intrinsics form instead of the shuffle form.
912  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
913  ReductionFlags Flags) const;
914 
915  /// \returns True if the target wants to expand the given reduction intrinsic
916  /// into a shuffle sequence.
917  bool shouldExpandReduction(const IntrinsicInst *II) const;
918  /// @}
919 
920 private:
921  /// \brief Estimate the latency of specified instruction.
922  /// Returns 1 as the default value.
923  int getInstructionLatency(const Instruction *I) const;
924 
925  /// \brief Returns the expected throughput cost of the instruction.
926  /// Returns -1 if the cost is unknown.
927  int getInstructionThroughput(const Instruction *I) const;
928 
929  /// \brief The abstract base class used to type erase specific TTI
930  /// implementations.
931  class Concept;
932 
933  /// \brief The template model for the base class which wraps a concrete
934  /// implementation in a type erased interface.
935  template <typename T> class Model;
936 
937  std::unique_ptr<Concept> TTIImpl;
938 };
939 
941 public:
942  virtual ~Concept() = 0;
943  virtual const DataLayout &getDataLayout() const = 0;
944  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
945  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
946  ArrayRef<const Value *> Operands) = 0;
947  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
948  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
949  virtual int getCallCost(const Function *F, int NumArgs) = 0;
950  virtual int getCallCost(const Function *F,
952  virtual unsigned getInliningThresholdMultiplier() = 0;
953  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
954  ArrayRef<Type *> ParamTys) = 0;
955  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
956  ArrayRef<const Value *> Arguments) = 0;
957  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
958  unsigned &JTSize) = 0;
959  virtual int
960  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
961  virtual bool hasBranchDivergence() = 0;
962  virtual bool isSourceOfDivergence(const Value *V) = 0;
963  virtual bool isAlwaysUniform(const Value *V) = 0;
964  virtual unsigned getFlatAddressSpace() = 0;
965  virtual bool isLoweredToCall(const Function *F) = 0;
966  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
967  UnrollingPreferences &UP) = 0;
968  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
969  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
970  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
971  int64_t BaseOffset, bool HasBaseReg,
972  int64_t Scale,
973  unsigned AddrSpace,
974  Instruction *I) = 0;
975  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
977  virtual bool isLegalMaskedStore(Type *DataType) = 0;
978  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
979  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
980  virtual bool isLegalMaskedGather(Type *DataType) = 0;
981  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
982  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
983  virtual bool prefersVectorizedAddressing() = 0;
984  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
985  int64_t BaseOffset, bool HasBaseReg,
986  int64_t Scale, unsigned AddrSpace) = 0;
987  virtual bool LSRWithInstrQueries() = 0;
988  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
989  virtual bool isProfitableToHoist(Instruction *I) = 0;
990  virtual bool isTypeLegal(Type *Ty) = 0;
991  virtual unsigned getJumpBufAlignment() = 0;
992  virtual unsigned getJumpBufSize() = 0;
993  virtual bool shouldBuildLookupTables() = 0;
994  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
995  virtual unsigned
996  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
997  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
998  unsigned VF) = 0;
999  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1000  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1001  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1002  bool IsZeroCmp) const = 0;
1003  virtual bool enableInterleavedAccessVectorization() = 0;
1004  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1005  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1006  unsigned BitWidth,
1007  unsigned AddressSpace,
1008  unsigned Alignment,
1009  bool *Fast) = 0;
1010  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1011  virtual bool haveFastSqrt(Type *Ty) = 0;
1012  virtual int getFPOpCost(Type *Ty) = 0;
1013  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1014  Type *Ty) = 0;
1015  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1016  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1017  Type *Ty) = 0;
1018  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1019  Type *Ty) = 0;
1020  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1021  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1022  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1023  virtual bool shouldConsiderAddressTypePromotion(
1024  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1025  virtual unsigned getCacheLineSize() = 0;
1026  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1027  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1028  virtual unsigned getPrefetchDistance() = 0;
1029  virtual unsigned getMinPrefetchStride() = 0;
1030  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1031  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1032  virtual unsigned
1033  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1034  OperandValueKind Opd2Info,
1035  OperandValueProperties Opd1PropInfo,
1036  OperandValueProperties Opd2PropInfo,
1037  ArrayRef<const Value *> Args) = 0;
1038  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1039  Type *SubTp) = 0;
1040  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1041  const Instruction *I) = 0;
1042  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1043  VectorType *VecTy, unsigned Index) = 0;
1044  virtual int getCFInstrCost(unsigned Opcode) = 0;
1045  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1046  Type *CondTy, const Instruction *I) = 0;
1047  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1048  unsigned Index) = 0;
1049  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1050  unsigned AddressSpace, const Instruction *I) = 0;
1051  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1052  unsigned Alignment,
1053  unsigned AddressSpace) = 0;
1054  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1055  Value *Ptr, bool VariableMask,
1056  unsigned Alignment) = 0;
1057  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1058  unsigned Factor,
1059  ArrayRef<unsigned> Indices,
1060  unsigned Alignment,
1061  unsigned AddressSpace) = 0;
1062  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1063  bool IsPairwiseForm) = 0;
1064  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1065  bool IsPairwiseForm, bool IsUnsigned) = 0;
1066  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1068  unsigned ScalarizationCostPassed) = 0;
1069  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1070  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1071  virtual int getCallInstrCost(Function *F, Type *RetTy,
1072  ArrayRef<Type *> Tys) = 0;
1073  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1074  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1075  const SCEV *Ptr) = 0;
1076  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1077  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1078  MemIntrinsicInfo &Info) = 0;
1079  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1080  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1081  Type *ExpectedType) = 0;
1082  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1083  unsigned SrcAlign,
1084  unsigned DestAlign) const = 0;
1085  virtual void getMemcpyLoopResidualLoweringType(
1086  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1087  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1088  virtual bool areInlineCompatible(const Function *Caller,
1089  const Function *Callee) const = 0;
1090  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1091  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1092  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1093  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1094  unsigned Alignment,
1095  unsigned AddrSpace) const = 0;
1096  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1097  unsigned Alignment,
1098  unsigned AddrSpace) const = 0;
1099  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1100  unsigned ChainSizeInBytes,
1101  VectorType *VecTy) const = 0;
1102  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1103  unsigned ChainSizeInBytes,
1104  VectorType *VecTy) const = 0;
1105  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1106  ReductionFlags) const = 0;
1107  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1108  virtual int getInstructionLatency(const Instruction *I) = 0;
1109 };
1110 
1111 template <typename T>
1113  T Impl;
1114 
1115 public:
1116  Model(T Impl) : Impl(std::move(Impl)) {}
1117  ~Model() override {}
1118 
1119  const DataLayout &getDataLayout() const override {
1120  return Impl.getDataLayout();
1121  }
1122 
1123  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1124  return Impl.getOperationCost(Opcode, Ty, OpTy);
1125  }
1126  int getGEPCost(Type *PointeeType, const Value *Ptr,
1127  ArrayRef<const Value *> Operands) override {
1128  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1129  }
1130  int getExtCost(const Instruction *I, const Value *Src) override {
1131  return Impl.getExtCost(I, Src);
1132  }
1133  int getCallCost(FunctionType *FTy, int NumArgs) override {
1134  return Impl.getCallCost(FTy, NumArgs);
1135  }
1136  int getCallCost(const Function *F, int NumArgs) override {
1137  return Impl.getCallCost(F, NumArgs);
1138  }
1139  int getCallCost(const Function *F,
1141  return Impl.getCallCost(F, Arguments);
1142  }
1143  unsigned getInliningThresholdMultiplier() override {
1144  return Impl.getInliningThresholdMultiplier();
1145  }
1146  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1147  ArrayRef<Type *> ParamTys) override {
1148  return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1149  }
1150  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1151  ArrayRef<const Value *> Arguments) override {
1152  return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1153  }
1154  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1155  return Impl.getUserCost(U, Operands);
1156  }
1157  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1158  bool isSourceOfDivergence(const Value *V) override {
1159  return Impl.isSourceOfDivergence(V);
1160  }
1161 
1162  bool isAlwaysUniform(const Value *V) override {
1163  return Impl.isAlwaysUniform(V);
1164  }
1165 
1166  unsigned getFlatAddressSpace() override {
1167  return Impl.getFlatAddressSpace();
1168  }
1169 
1170  bool isLoweredToCall(const Function *F) override {
1171  return Impl.isLoweredToCall(F);
1172  }
1173  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1174  UnrollingPreferences &UP) override {
1175  return Impl.getUnrollingPreferences(L, SE, UP);
1176  }
1177  bool isLegalAddImmediate(int64_t Imm) override {
1178  return Impl.isLegalAddImmediate(Imm);
1179  }
1180  bool isLegalICmpImmediate(int64_t Imm) override {
1181  return Impl.isLegalICmpImmediate(Imm);
1182  }
1183  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1184  bool HasBaseReg, int64_t Scale,
1185  unsigned AddrSpace,
1186  Instruction *I) override {
1187  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1188  Scale, AddrSpace, I);
1189  }
1190  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1191  TargetTransformInfo::LSRCost &C2) override {
1192  return Impl.isLSRCostLess(C1, C2);
1193  }
1194  bool isLegalMaskedStore(Type *DataType) override {
1195  return Impl.isLegalMaskedStore(DataType);
1196  }
1197  bool isLegalMaskedLoad(Type *DataType) override {
1198  return Impl.isLegalMaskedLoad(DataType);
1199  }
1200  bool isLegalMaskedScatter(Type *DataType) override {
1201  return Impl.isLegalMaskedScatter(DataType);
1202  }
1203  bool isLegalMaskedGather(Type *DataType) override {
1204  return Impl.isLegalMaskedGather(DataType);
1205  }
1206  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1207  return Impl.hasDivRemOp(DataType, IsSigned);
1208  }
1209  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1210  return Impl.hasVolatileVariant(I, AddrSpace);
1211  }
1212  bool prefersVectorizedAddressing() override {
1213  return Impl.prefersVectorizedAddressing();
1214  }
1215  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1216  bool HasBaseReg, int64_t Scale,
1217  unsigned AddrSpace) override {
1218  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1219  Scale, AddrSpace);
1220  }
1221  bool LSRWithInstrQueries() override {
1222  return Impl.LSRWithInstrQueries();
1223  }
1224  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1225  return Impl.isTruncateFree(Ty1, Ty2);
1226  }
1227  bool isProfitableToHoist(Instruction *I) override {
1228  return Impl.isProfitableToHoist(I);
1229  }
1230  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1231  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1232  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1233  bool shouldBuildLookupTables() override {
1234  return Impl.shouldBuildLookupTables();
1235  }
1236  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1237  return Impl.shouldBuildLookupTablesForConstant(C);
1238  }
1239  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1240  bool Extract) override {
1241  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1242  }
1243  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1244  unsigned VF) override {
1245  return Impl.getOperandsScalarizationOverhead(Args, VF);
1246  }
1247 
1248  bool supportsEfficientVectorElementLoadStore() override {
1249  return Impl.supportsEfficientVectorElementLoadStore();
1250  }
1251 
1252  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1253  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1254  }
1255  const MemCmpExpansionOptions *enableMemCmpExpansion(
1256  bool IsZeroCmp) const override {
1257  return Impl.enableMemCmpExpansion(IsZeroCmp);
1258  }
1259  bool enableInterleavedAccessVectorization() override {
1260  return Impl.enableInterleavedAccessVectorization();
1261  }
1262  bool isFPVectorizationPotentiallyUnsafe() override {
1263  return Impl.isFPVectorizationPotentiallyUnsafe();
1264  }
1265  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1266  unsigned BitWidth, unsigned AddressSpace,
1267  unsigned Alignment, bool *Fast) override {
1268  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1269  Alignment, Fast);
1270  }
1271  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1272  return Impl.getPopcntSupport(IntTyWidthInBit);
1273  }
1274  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1275 
1276  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1277 
1278  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1279  Type *Ty) override {
1280  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1281  }
1282  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1283  return Impl.getIntImmCost(Imm, Ty);
1284  }
1285  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1286  Type *Ty) override {
1287  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1288  }
1289  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1290  Type *Ty) override {
1291  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1292  }
1293  unsigned getNumberOfRegisters(bool Vector) override {
1294  return Impl.getNumberOfRegisters(Vector);
1295  }
1296  unsigned getRegisterBitWidth(bool Vector) const override {
1297  return Impl.getRegisterBitWidth(Vector);
1298  }
1299  unsigned getMinVectorRegisterBitWidth() override {
1300  return Impl.getMinVectorRegisterBitWidth();
1301  }
1302  bool shouldConsiderAddressTypePromotion(
1303  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1304  return Impl.shouldConsiderAddressTypePromotion(
1305  I, AllowPromotionWithoutCommonHeader);
1306  }
1307  unsigned getCacheLineSize() override {
1308  return Impl.getCacheLineSize();
1309  }
1310  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1311  return Impl.getCacheSize(Level);
1312  }
1313  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1314  return Impl.getCacheAssociativity(Level);
1315  }
1316  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1317  unsigned getMinPrefetchStride() override {
1318  return Impl.getMinPrefetchStride();
1319  }
1320  unsigned getMaxPrefetchIterationsAhead() override {
1321  return Impl.getMaxPrefetchIterationsAhead();
1322  }
1323  unsigned getMaxInterleaveFactor(unsigned VF) override {
1324  return Impl.getMaxInterleaveFactor(VF);
1325  }
1326  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1327  unsigned &JTSize) override {
1328  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1329  }
1330  unsigned
1331  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1332  OperandValueKind Opd2Info,
1333  OperandValueProperties Opd1PropInfo,
1334  OperandValueProperties Opd2PropInfo,
1335  ArrayRef<const Value *> Args) override {
1336  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1337  Opd1PropInfo, Opd2PropInfo, Args);
1338  }
1339  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1340  Type *SubTp) override {
1341  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1342  }
1343  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1344  const Instruction *I) override {
1345  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1346  }
1347  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1348  unsigned Index) override {
1349  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1350  }
1351  int getCFInstrCost(unsigned Opcode) override {
1352  return Impl.getCFInstrCost(Opcode);
1353  }
1354  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1355  const Instruction *I) override {
1356  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1357  }
1358  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1359  return Impl.getVectorInstrCost(Opcode, Val, Index);
1360  }
1361  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1362  unsigned AddressSpace, const Instruction *I) override {
1363  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1364  }
1365  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1366  unsigned AddressSpace) override {
1367  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1368  }
1369  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1370  Value *Ptr, bool VariableMask,
1371  unsigned Alignment) override {
1372  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1373  Alignment);
1374  }
1375  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1376  ArrayRef<unsigned> Indices, unsigned Alignment,
1377  unsigned AddressSpace) override {
1378  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1379  Alignment, AddressSpace);
1380  }
1381  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1382  bool IsPairwiseForm) override {
1383  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1384  }
1385  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1386  bool IsPairwiseForm, bool IsUnsigned) override {
1387  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1388  }
1389  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1390  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1391  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1392  ScalarizationCostPassed);
1393  }
1394  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1395  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1396  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1397  }
1398  int getCallInstrCost(Function *F, Type *RetTy,
1399  ArrayRef<Type *> Tys) override {
1400  return Impl.getCallInstrCost(F, RetTy, Tys);
1401  }
1402  unsigned getNumberOfParts(Type *Tp) override {
1403  return Impl.getNumberOfParts(Tp);
1404  }
1405  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1406  const SCEV *Ptr) override {
1407  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1408  }
1409  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1410  return Impl.getCostOfKeepingLiveOverCall(Tys);
1411  }
1412  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1413  MemIntrinsicInfo &Info) override {
1414  return Impl.getTgtMemIntrinsic(Inst, Info);
1415  }
1416  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1417  return Impl.getAtomicMemIntrinsicMaxElementSize();
1418  }
1419  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1420  Type *ExpectedType) override {
1421  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1422  }
1423  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1424  unsigned SrcAlign,
1425  unsigned DestAlign) const override {
1426  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1427  }
1428  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1429  LLVMContext &Context,
1430  unsigned RemainingBytes,
1431  unsigned SrcAlign,
1432  unsigned DestAlign) const override {
1433  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1434  SrcAlign, DestAlign);
1435  }
1436  bool areInlineCompatible(const Function *Caller,
1437  const Function *Callee) const override {
1438  return Impl.areInlineCompatible(Caller, Callee);
1439  }
1440  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1441  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1442  }
1443  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1444  return Impl.isLegalToVectorizeLoad(LI);
1445  }
1446  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1447  return Impl.isLegalToVectorizeStore(SI);
1448  }
1449  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1450  unsigned Alignment,
1451  unsigned AddrSpace) const override {
1452  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1453  AddrSpace);
1454  }
1455  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1456  unsigned Alignment,
1457  unsigned AddrSpace) const override {
1458  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1459  AddrSpace);
1460  }
1461  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1462  unsigned ChainSizeInBytes,
1463  VectorType *VecTy) const override {
1464  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1465  }
1466  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1467  unsigned ChainSizeInBytes,
1468  VectorType *VecTy) const override {
1469  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1470  }
1471  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1472  ReductionFlags Flags) const override {
1473  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1474  }
1475  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1476  return Impl.shouldExpandReduction(II);
1477  }
1478  int getInstructionLatency(const Instruction *I) override {
1479  return Impl.getInstructionLatency(I);
1480  }
1481 };
1482 
1483 template <typename T>
1485  : TTIImpl(new Model<T>(Impl)) {}
1486 
1487 /// \brief Analysis pass providing the \c TargetTransformInfo.
1488 ///
1489 /// The core idea of the TargetIRAnalysis is to expose an interface through
1490 /// which LLVM targets can analyze and provide information about the middle
1491 /// end's target-independent IR. This supports use cases such as target-aware
1492 /// cost modeling of IR constructs.
1493 ///
1494 /// This is a function analysis because much of the cost modeling for targets
1495 /// is done in a subtarget specific way and LLVM supports compiling different
1496 /// functions targeting different subtargets in order to support runtime
1497 /// dispatch according to the observed subtarget.
1498 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1499 public:
1501 
1502  /// \brief Default construct a target IR analysis.
1503  ///
1504  /// This will use the module's datalayout to construct a baseline
1505  /// conservative TTI result.
1506  TargetIRAnalysis();
1507 
1508  /// \brief Construct an IR analysis pass around a target-provide callback.
1509  ///
1510  /// The callback will be called with a particular function for which the TTI
1511  /// is needed and must return a TTI object for that function.
1512  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1513 
1514  // Value semantics. We spell out the constructors for MSVC.
1516  : TTICallback(Arg.TTICallback) {}
1518  : TTICallback(std::move(Arg.TTICallback)) {}
1520  TTICallback = RHS.TTICallback;
1521  return *this;
1522  }
1524  TTICallback = std::move(RHS.TTICallback);
1525  return *this;
1526  }
1527 
1528  Result run(const Function &F, FunctionAnalysisManager &);
1529 
1530 private:
1532  static AnalysisKey Key;
1533 
1534  /// \brief The callback used to produce a result.
1535  ///
1536  /// We use a completely opaque callback so that targets can provide whatever
1537  /// mechanism they desire for constructing the TTI for a given function.
1538  ///
1539  /// FIXME: Should we really use std::function? It's relatively inefficient.
1540  /// It might be possible to arrange for even stateful callbacks to outlive
1541  /// the analysis and thus use a function_ref which would be lighter weight.
1542  /// This may also be less error prone as the callback is likely to reference
1543  /// the external TargetMachine, and that reference needs to never dangle.
1544  std::function<Result(const Function &)> TTICallback;
1545 
1546  /// \brief Helper function used as the callback in the default constructor.
1547  static Result getDefaultTTI(const Function &F);
1548 };
1549 
1550 /// \brief Wrapper pass for TargetTransformInfo.
1551 ///
1552 /// This pass can be constructed from a TTI object which it stores internally
1553 /// and is queried by passes.
1555  TargetIRAnalysis TIRA;
1557 
1558  virtual void anchor();
1559 
1560 public:
1561  static char ID;
1562 
1563  /// \brief We must provide a default constructor for the pass but it should
1564  /// never be used.
1565  ///
1566  /// Use the constructor below or call one of the creation routines.
1568 
1570 
1571  TargetTransformInfo &getTTI(const Function &F);
1572 };
1573 
1574 /// \brief Create an analysis pass wrapper around a TTI object.
1575 ///
1576 /// This analysis pass just holds the TTI instance and makes it available to
1577 /// clients.
1579 
1580 } // End llvm namespace
1581 
1582 #endif
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
AMDGPU Rewrite Out Arguments
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
Estimate the overhead of scalarizing an instruction.
value_op_iterator value_op_begin()
Definition: User.h:240
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:243
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: BitVector.h:920
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Choose alternate elements from vector.
Class to represent function types.
Definition: DerivedTypes.h:103
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
An instruction for storing to memory.
Definition: Instructions.h:306
Reverse the order of the vector.
amdgpu Simplify well known AMD library false Value * Callee
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
This is an important base class in LLVM.
Definition: Constant.h:42
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:382
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
AddressSpace
Definition: NVPTXBaseInfo.h:22
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:69
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
amdgpu Simplify well known AMD library false Value Value * Arg
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:559
const unsigned Kind
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:73
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.