LLVM  7.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This pass exposes codegen information to IR-level passes. Every
11 /// transformation that uses codegen information is broken into three parts:
12 /// 1. The IR-level analysis pass.
13 /// 2. The IR-level transformation interface which provides the needed
14 /// information.
15 /// 3. Codegen-level implementation which uses target-specific hooks.
16 ///
17 /// This file defines #2, which is the interface that IR-level transformations
18 /// use for querying the codegen.
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24 
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/IR/Operator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/DataTypes.h"
31 #include <functional>
32 
33 namespace llvm {
34 
35 namespace Intrinsic {
36 enum ID : unsigned;
37 }
38 
39 class Function;
40 class GlobalValue;
41 class IntrinsicInst;
42 class LoadInst;
43 class Loop;
44 class SCEV;
45 class ScalarEvolution;
46 class StoreInst;
47 class SwitchInst;
48 class Type;
49 class User;
50 class Value;
51 
52 /// \brief Information about a load/store intrinsic defined by the target.
54  /// This is the pointer that the intrinsic is loading from or storing to.
55  /// If this is non-null, then analysis/optimization passes can assume that
56  /// this intrinsic is functionally equivalent to a load/store from this
57  /// pointer.
58  Value *PtrVal = nullptr;
59 
60  // Ordering for atomic operations.
62 
63  // Same Id is set by the target for corresponding load/store intrinsics.
64  unsigned short MatchingId = 0;
65 
66  bool ReadMem = false;
67  bool WriteMem = false;
68  bool IsVolatile = false;
69 
70  bool isUnordered() const {
71  return (Ordering == AtomicOrdering::NotAtomic ||
72  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
73  }
74 };
75 
76 /// \brief This pass provides access to the codegen interfaces that are needed
77 /// for IR-level transformations.
79 public:
80  /// \brief Construct a TTI object using a type implementing the \c Concept
81  /// API below.
82  ///
83  /// This is used by targets to construct a TTI wrapping their target-specific
84  /// implementaion that encodes appropriate costs for their target.
85  template <typename T> TargetTransformInfo(T Impl);
86 
87  /// \brief Construct a baseline TTI object using a minimal implementation of
88  /// the \c Concept API below.
89  ///
90  /// The TTI implementation will reflect the information in the DataLayout
91  /// provided if non-null.
92  explicit TargetTransformInfo(const DataLayout &DL);
93 
94  // Provide move semantics.
96  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
97 
98  // We need to define the destructor out-of-line to define our sub-classes
99  // out-of-line.
101 
102  /// \brief Handle the invalidation of this information.
103  ///
104  /// When used as a result of \c TargetIRAnalysis this method will be called
105  /// when the function this was computed for changes. When it returns false,
106  /// the information is preserved across those changes.
109  // FIXME: We should probably in some way ensure that the subtarget
110  // information for a function hasn't changed.
111  return false;
112  }
113 
114  /// \name Generic Target Information
115  /// @{
116 
117  /// \brief The kind of cost model.
118  ///
119  /// There are several different cost models that can be customized by the
120  /// target. The normalization of each cost model may be target specific.
122  TCK_RecipThroughput, ///< Reciprocal throughput.
123  TCK_Latency, ///< The latency of instruction.
124  TCK_CodeSize ///< Instruction code size.
125  };
126 
127  /// \brief Query the cost of a specified instruction.
128  ///
129  /// Clients should use this interface to query the cost of an existing
130  /// instruction. The instruction must have a valid parent (basic block).
131  ///
132  /// Note, this method does not cache the cost calculation and it
133  /// can be expensive in some cases.
134  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
135  switch (kind){
136  case TCK_RecipThroughput:
137  return getInstructionThroughput(I);
138 
139  case TCK_Latency:
140  return getInstructionLatency(I);
141 
142  case TCK_CodeSize:
143  return getUserCost(I);
144  }
145  llvm_unreachable("Unknown instruction cost kind");
146  }
147 
148  /// \brief Underlying constants for 'cost' values in this interface.
149  ///
150  /// Many APIs in this interface return a cost. This enum defines the
151  /// fundamental values that should be used to interpret (and produce) those
152  /// costs. The costs are returned as an int rather than a member of this
153  /// enumeration because it is expected that the cost of one IR instruction
154  /// may have a multiplicative factor to it or otherwise won't fit directly
155  /// into the enum. Moreover, it is common to sum or average costs which works
156  /// better as simple integral values. Thus this enum only provides constants.
157  /// Also note that the returned costs are signed integers to make it natural
158  /// to add, subtract, and test with zero (a common boundary condition). It is
159  /// not expected that 2^32 is a realistic cost to be modeling at any point.
160  ///
161  /// Note that these costs should usually reflect the intersection of code-size
162  /// cost and execution cost. A free instruction is typically one that folds
163  /// into another instruction. For example, reg-to-reg moves can often be
164  /// skipped by renaming the registers in the CPU, but they still are encoded
165  /// and thus wouldn't be considered 'free' here.
167  TCC_Free = 0, ///< Expected to fold away in lowering.
168  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
169  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
170  };
171 
172  /// \brief Estimate the cost of a specific operation when lowered.
173  ///
174  /// Note that this is designed to work on an arbitrary synthetic opcode, and
175  /// thus work for hypothetical queries before an instruction has even been
176  /// formed. However, this does *not* work for GEPs, and must not be called
177  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
178  /// analyzing a GEP's cost required more information.
179  ///
180  /// Typically only the result type is required, and the operand type can be
181  /// omitted. However, if the opcode is one of the cast instructions, the
182  /// operand type is required.
183  ///
184  /// The returned cost is defined in terms of \c TargetCostConstants, see its
185  /// comments for a detailed explanation of the cost values.
186  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
187 
188  /// \brief Estimate the cost of a GEP operation when lowered.
189  ///
190  /// The contract for this function is the same as \c getOperationCost except
191  /// that it supports an interface that provides extra information specific to
192  /// the GEP operation.
193  int getGEPCost(Type *PointeeType, const Value *Ptr,
194  ArrayRef<const Value *> Operands) const;
195 
196  /// \brief Estimate the cost of a EXT operation when lowered.
197  ///
198  /// The contract for this function is the same as \c getOperationCost except
199  /// that it supports an interface that provides extra information specific to
200  /// the EXT operation.
201  int getExtCost(const Instruction *I, const Value *Src) const;
202 
203  /// \brief Estimate the cost of a function call when lowered.
204  ///
205  /// The contract for this is the same as \c getOperationCost except that it
206  /// supports an interface that provides extra information specific to call
207  /// instructions.
208  ///
209  /// This is the most basic query for estimating call cost: it only knows the
210  /// function type and (potentially) the number of arguments at the call site.
211  /// The latter is only interesting for varargs function types.
212  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
213 
214  /// \brief Estimate the cost of calling a specific function when lowered.
215  ///
216  /// This overload adds the ability to reason about the particular function
217  /// being called in the event it is a library call with special lowering.
218  int getCallCost(const Function *F, int NumArgs = -1) const;
219 
220  /// \brief Estimate the cost of calling a specific function when lowered.
221  ///
222  /// This overload allows specifying a set of candidate argument values.
223  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
224 
225  /// \returns A value by which our inlining threshold should be multiplied.
226  /// This is primarily used to bump up the inlining threshold wholesale on
227  /// targets where calls are unusually expensive.
228  ///
229  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
230  /// individual classes of instructions would be better.
231  unsigned getInliningThresholdMultiplier() const;
232 
233  /// \brief Estimate the cost of an intrinsic when lowered.
234  ///
235  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
236  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
237  ArrayRef<Type *> ParamTys) const;
238 
239  /// \brief Estimate the cost of an intrinsic when lowered.
240  ///
241  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
242  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
244 
245  /// \return The estimated number of case clusters when lowering \p 'SI'.
246  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
247  /// table.
248  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
249  unsigned &JTSize) const;
250 
251  /// \brief Estimate the cost of a given IR user when lowered.
252  ///
253  /// This can estimate the cost of either a ConstantExpr or Instruction when
254  /// lowered. It has two primary advantages over the \c getOperationCost and
255  /// \c getGEPCost above, and one significant disadvantage: it can only be
256  /// used when the IR construct has already been formed.
257  ///
258  /// The advantages are that it can inspect the SSA use graph to reason more
259  /// accurately about the cost. For example, all-constant-GEPs can often be
260  /// folded into a load or other instruction, but if they are used in some
261  /// other context they may not be folded. This routine can distinguish such
262  /// cases.
263  ///
264  /// \p Operands is a list of operands which can be a result of transformations
265  /// of the current operands. The number of the operands on the list must equal
266  /// to the number of the current operands the IR user has. Their order on the
267  /// list must be the same as the order of the current operands the IR user
268  /// has.
269  ///
270  /// The returned cost is defined in terms of \c TargetCostConstants, see its
271  /// comments for a detailed explanation of the cost values.
272  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
273 
274  /// \brief This is a helper function which calls the two-argument getUserCost
275  /// with \p Operands which are the current operands U has.
276  int getUserCost(const User *U) const {
278  U->value_op_end());
279  return getUserCost(U, Operands);
280  }
281 
282  /// \brief Return true if branch divergence exists.
283  ///
284  /// Branch divergence has a significantly negative impact on GPU performance
285  /// when threads in the same wavefront take different paths due to conditional
286  /// branches.
287  bool hasBranchDivergence() const;
288 
289  /// \brief Returns whether V is a source of divergence.
290  ///
291  /// This function provides the target-dependent information for
292  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
293  /// builds the dependency graph, and then runs the reachability algorithm
294  /// starting with the sources of divergence.
295  bool isSourceOfDivergence(const Value *V) const;
296 
297  // \brief Returns true for the target specific
298  // set of operations which produce uniform result
299  // even taking non-unform arguments
300  bool isAlwaysUniform(const Value *V) const;
301 
302  /// Returns the address space ID for a target's 'flat' address space. Note
303  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
304  /// refers to as the generic address space. The flat address space is a
305  /// generic address space that can be used access multiple segments of memory
306  /// with different address spaces. Access of a memory location through a
307  /// pointer with this address space is expected to be legal but slower
308  /// compared to the same memory location accessed through a pointer with a
309  /// different address space.
310  //
311  /// This is for targets with different pointer representations which can
312  /// be converted with the addrspacecast instruction. If a pointer is converted
313  /// to this address space, optimizations should attempt to replace the access
314  /// with the source address space.
315  ///
316  /// \returns ~0u if the target does not have such a flat address space to
317  /// optimize away.
318  unsigned getFlatAddressSpace() const;
319 
320  /// \brief Test whether calls to a function lower to actual program function
321  /// calls.
322  ///
323  /// The idea is to test whether the program is likely to require a 'call'
324  /// instruction or equivalent in order to call the given function.
325  ///
326  /// FIXME: It's not clear that this is a good or useful query API. Client's
327  /// should probably move to simpler cost metrics using the above.
328  /// Alternatively, we could split the cost interface into distinct code-size
329  /// and execution-speed costs. This would allow modelling the core of this
330  /// query more accurately as a call is a single small instruction, but
331  /// incurs significant execution cost.
332  bool isLoweredToCall(const Function *F) const;
333 
334  struct LSRCost {
335  /// TODO: Some of these could be merged. Also, a lexical ordering
336  /// isn't always optimal.
337  unsigned Insns;
338  unsigned NumRegs;
339  unsigned AddRecCost;
340  unsigned NumIVMuls;
341  unsigned NumBaseAdds;
342  unsigned ImmCost;
343  unsigned SetupCost;
344  unsigned ScaleCost;
345  };
346 
347  /// Parameters that control the generic loop unrolling transformation.
349  /// The cost threshold for the unrolled loop. Should be relative to the
350  /// getUserCost values returned by this API, and the expectation is that
351  /// the unrolled loop's instructions when run through that interface should
352  /// not exceed this cost. However, this is only an estimate. Also, specific
353  /// loops may be unrolled even with a cost above this threshold if deemed
354  /// profitable. Set this to UINT_MAX to disable the loop body cost
355  /// restriction.
356  unsigned Threshold;
357  /// If complete unrolling will reduce the cost of the loop, we will boost
358  /// the Threshold by a certain percent to allow more aggressive complete
359  /// unrolling. This value provides the maximum boost percentage that we
360  /// can apply to Threshold (The value should be no less than 100).
361  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
362  /// MaxPercentThresholdBoost / 100)
363  /// E.g. if complete unrolling reduces the loop execution time by 50%
364  /// then we boost the threshold by the factor of 2x. If unrolling is not
365  /// expected to reduce the running time, then we do not increase the
366  /// threshold.
368  /// The cost threshold for the unrolled loop when optimizing for size (set
369  /// to UINT_MAX to disable).
371  /// The cost threshold for the unrolled loop, like Threshold, but used
372  /// for partial/runtime unrolling (set to UINT_MAX to disable).
374  /// The cost threshold for the unrolled loop when optimizing for size, like
375  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
376  /// UINT_MAX to disable).
378  /// A forced unrolling factor (the number of concatenated bodies of the
379  /// original loop in the unrolled loop body). When set to 0, the unrolling
380  /// transformation will select an unrolling factor based on the current cost
381  /// threshold and other factors.
382  unsigned Count;
383  /// A forced peeling factor (the number of bodied of the original loop
384  /// that should be peeled off before the loop body). When set to 0, the
385  /// unrolling transformation will select a peeling factor based on profile
386  /// information and other factors.
387  unsigned PeelCount;
388  /// Default unroll count for loops with run-time trip count.
390  // Set the maximum unrolling factor. The unrolling factor may be selected
391  // using the appropriate cost threshold, but may not exceed this number
392  // (set to UINT_MAX to disable). This does not apply in cases where the
393  // loop is being fully unrolled.
394  unsigned MaxCount;
395  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
396  /// applies even if full unrolling is selected. This allows a target to fall
397  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
399  // Represents number of instructions optimized when "back edge"
400  // becomes "fall through" in unrolled loop.
401  // For now we count a conditional branch on a backedge and a comparison
402  // feeding it.
403  unsigned BEInsns;
404  /// Allow partial unrolling (unrolling of loops to expand the size of the
405  /// loop body, not only to eliminate small constant-trip-count loops).
406  bool Partial;
407  /// Allow runtime unrolling (unrolling of loops to expand the size of the
408  /// loop body even when the number of loop iterations is not known at
409  /// compile time).
410  bool Runtime;
411  /// Allow generation of a loop remainder (extra iterations after unroll).
413  /// Allow emitting expensive instructions (such as divisions) when computing
414  /// the trip count of a loop for runtime unrolling.
416  /// Apply loop unroll on any kind of loop
417  /// (mainly to loops that fail runtime unrolling).
418  bool Force;
419  /// Allow using trip count upper bound to unroll loops.
421  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
423  /// Allow unrolling of all the iterations of the runtime loop remainder.
425  };
426 
427  /// \brief Get target-customized preferences for the generic loop unrolling
428  /// transformation. The caller will initialize UP with the current
429  /// target-independent defaults.
430  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
431  UnrollingPreferences &UP) const;
432 
433  /// @}
434 
435  /// \name Scalar Target Information
436  /// @{
437 
438  /// \brief Flags indicating the kind of support for population count.
439  ///
440  /// Compared to the SW implementation, HW support is supposed to
441  /// significantly boost the performance when the population is dense, and it
442  /// may or may not degrade performance if the population is sparse. A HW
443  /// support is considered as "Fast" if it can outperform, or is on a par
444  /// with, SW implementation when the population is sparse; otherwise, it is
445  /// considered as "Slow".
446  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
447 
448  /// \brief Return true if the specified immediate is legal add immediate, that
449  /// is the target has add instructions which can add a register with the
450  /// immediate without having to materialize the immediate into a register.
451  bool isLegalAddImmediate(int64_t Imm) const;
452 
453  /// \brief Return true if the specified immediate is legal icmp immediate,
454  /// that is the target has icmp instructions which can compare a register
455  /// against the immediate without having to materialize the immediate into a
456  /// register.
457  bool isLegalICmpImmediate(int64_t Imm) const;
458 
459  /// \brief Return true if the addressing mode represented by AM is legal for
460  /// this target, for a load/store of the specified type.
461  /// The type may be VoidTy, in which case only return true if the addressing
462  /// mode is legal for a load/store of any legal type.
463  /// If target returns true in LSRWithInstrQueries(), I may be valid.
464  /// TODO: Handle pre/postinc as well.
465  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
466  bool HasBaseReg, int64_t Scale,
467  unsigned AddrSpace = 0,
468  Instruction *I = nullptr) const;
469 
470  /// \brief Return true if LSR cost of C1 is lower than C1.
471  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
472  TargetTransformInfo::LSRCost &C2) const;
473 
474  /// Return true if the target can fuse a compare and branch.
475  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
476  /// calculation for the instructions in a loop.
477  bool canMacroFuseCmp() const;
478 
479  /// \return True is LSR should make efforts to create/preserve post-inc
480  /// addressing mode expressions.
481  bool shouldFavorPostInc() const;
482 
483  /// \brief Return true if the target supports masked load/store
484  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
485  bool isLegalMaskedStore(Type *DataType) const;
486  bool isLegalMaskedLoad(Type *DataType) const;
487 
488  /// \brief Return true if the target supports masked gather/scatter
489  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
490  /// bits scalar type.
491  bool isLegalMaskedScatter(Type *DataType) const;
492  bool isLegalMaskedGather(Type *DataType) const;
493 
494  /// Return true if the target has a unified operation to calculate division
495  /// and remainder. If so, the additional implicit multiplication and
496  /// subtraction required to calculate a remainder from division are free. This
497  /// can enable more aggressive transformations for division and remainder than
498  /// would typically be allowed using throughput or size cost models.
499  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
500 
501  /// Return true if the given instruction (assumed to be a memory access
502  /// instruction) has a volatile variant. If that's the case then we can avoid
503  /// addrspacecast to generic AS for volatile loads/stores. Default
504  /// implementation returns false, which prevents address space inference for
505  /// volatile loads/stores.
506  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
507 
508  /// Return true if target doesn't mind addresses in vectors.
509  bool prefersVectorizedAddressing() const;
510 
511  /// \brief Return the cost of the scaling factor used in the addressing
512  /// mode represented by AM for this target, for a load/store
513  /// of the specified type.
514  /// If the AM is supported, the return value must be >= 0.
515  /// If the AM is not supported, it returns a negative value.
516  /// TODO: Handle pre/postinc as well.
517  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
518  bool HasBaseReg, int64_t Scale,
519  unsigned AddrSpace = 0) const;
520 
521  /// \brief Return true if the loop strength reduce pass should make
522  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
523  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
524  /// immediate offset and no index register.
525  bool LSRWithInstrQueries() const;
526 
527  /// \brief Return true if it's free to truncate a value of type Ty1 to type
528  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
529  /// by referencing its sub-register AX.
530  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
531 
532  /// \brief Return true if it is profitable to hoist instruction in the
533  /// then/else to before if.
534  bool isProfitableToHoist(Instruction *I) const;
535 
536  bool useAA() const;
537 
538  /// \brief Return true if this type is legal.
539  bool isTypeLegal(Type *Ty) const;
540 
541  /// \brief Returns the target's jmp_buf alignment in bytes.
542  unsigned getJumpBufAlignment() const;
543 
544  /// \brief Returns the target's jmp_buf size in bytes.
545  unsigned getJumpBufSize() const;
546 
547  /// \brief Return true if switches should be turned into lookup tables for the
548  /// target.
549  bool shouldBuildLookupTables() const;
550 
551  /// \brief Return true if switches should be turned into lookup tables
552  /// containing this constant value for the target.
553  bool shouldBuildLookupTablesForConstant(Constant *C) const;
554 
555  /// \brief Return true if the input function which is cold at all call sites,
556  /// should use coldcc calling convention.
557  bool useColdCCForColdCall(Function &F) const;
558 
559  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
560 
561  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
562  unsigned VF) const;
563 
564  /// If target has efficient vector element load/store instructions, it can
565  /// return true here so that insertion/extraction costs are not added to
566  /// the scalarization cost of a load/store.
567  bool supportsEfficientVectorElementLoadStore() const;
568 
569  /// \brief Don't restrict interleaved unrolling to small loops.
570  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
571 
572  /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
573  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
575  // The list of available load sizes (in bytes), sorted in decreasing order.
577  };
578  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
579 
580  /// \brief Enable matching of interleaved access groups.
581  bool enableInterleavedAccessVectorization() const;
582 
583  /// \brief Indicate that it is potentially unsafe to automatically vectorize
584  /// floating-point operations because the semantics of vector and scalar
585  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
586  /// does not support IEEE-754 denormal numbers, while depending on the
587  /// platform, scalar floating-point math does.
588  /// This applies to floating-point math operations and calls, not memory
589  /// operations, shuffles, or casts.
590  bool isFPVectorizationPotentiallyUnsafe() const;
591 
592  /// \brief Determine if the target supports unaligned memory accesses.
593  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
594  unsigned BitWidth, unsigned AddressSpace = 0,
595  unsigned Alignment = 1,
596  bool *Fast = nullptr) const;
597 
598  /// \brief Return hardware support for population count.
599  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
600 
601  /// \brief Return true if the hardware has a fast square-root instruction.
602  bool haveFastSqrt(Type *Ty) const;
603 
604  /// Return true if it is faster to check if a floating-point value is NaN
605  /// (or not-NaN) versus a comparison against a constant FP zero value.
606  /// Targets should override this if materializing a 0.0 for comparison is
607  /// generally as cheap as checking for ordered/unordered.
608  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
609 
610  /// \brief Return the expected cost of supporting the floating point operation
611  /// of the specified type.
612  int getFPOpCost(Type *Ty) const;
613 
614  /// \brief Return the expected cost of materializing for the given integer
615  /// immediate of the specified type.
616  int getIntImmCost(const APInt &Imm, Type *Ty) const;
617 
618  /// \brief Return the expected cost of materialization for the given integer
619  /// immediate of the specified type for a given instruction. The cost can be
620  /// zero if the immediate can be folded into the specified instruction.
621  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
622  Type *Ty) const;
623  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
624  Type *Ty) const;
625 
626  /// \brief Return the expected cost for the given integer when optimising
627  /// for size. This is different than the other integer immediate cost
628  /// functions in that it is subtarget agnostic. This is useful when you e.g.
629  /// target one ISA such as Aarch32 but smaller encodings could be possible
630  /// with another such as Thumb. This return value is used as a penalty when
631  /// the total costs for a constant is calculated (the bigger the cost, the
632  /// more beneficial constant hoisting is).
633  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
634  Type *Ty) const;
635  /// @}
636 
637  /// \name Vector Target Information
638  /// @{
639 
640  /// \brief The various kinds of shuffle patterns for vector queries.
641  enum ShuffleKind {
642  SK_Broadcast, ///< Broadcast element 0 to all other elements.
643  SK_Reverse, ///< Reverse the order of the vector.
644  SK_Alternate, ///< Choose alternate elements from vector.
645  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
646  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
647  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
648  ///< with any shuffle mask.
649  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
650  ///< shuffle mask.
651  };
652 
653  /// \brief Additional information about an operand's possible values.
655  OK_AnyValue, // Operand can have any value.
656  OK_UniformValue, // Operand is uniform (splat of a value).
657  OK_UniformConstantValue, // Operand is uniform constant.
658  OK_NonUniformConstantValue // Operand is a non uniform constant value.
659  };
660 
661  /// \brief Additional properties of an operand's values.
662  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
663 
664  /// \return The number of scalar or vector registers that the target has.
665  /// If 'Vectors' is true, it returns the number of vector registers. If it is
666  /// set to false, it returns the number of scalar registers.
667  unsigned getNumberOfRegisters(bool Vector) const;
668 
669  /// \return The width of the largest scalar or vector register type.
670  unsigned getRegisterBitWidth(bool Vector) const;
671 
672  /// \return The width of the smallest vector register type.
673  unsigned getMinVectorRegisterBitWidth() const;
674 
675  /// \return True if the vectorization factor should be chosen to
676  /// make the vector of the smallest element type match the size of a
677  /// vector register. For wider element types, this could result in
678  /// creating vectors that span multiple vector registers.
679  /// If false, the vectorization factor will be chosen based on the
680  /// size of the widest element type.
681  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
682 
683  /// \return The minimum vectorization factor for types of given element
684  /// bit width, or 0 if there is no mimimum VF. The returned value only
685  /// applies when shouldMaximizeVectorBandwidth returns true.
686  unsigned getMinimumVF(unsigned ElemWidth) const;
687 
688  /// \return True if it should be considered for address type promotion.
689  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
690  /// profitable without finding other extensions fed by the same input.
691  bool shouldConsiderAddressTypePromotion(
692  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
693 
694  /// \return The size of a cache line in bytes.
695  unsigned getCacheLineSize() const;
696 
697  /// The possible cache levels
698  enum class CacheLevel {
699  L1D, // The L1 data cache
700  L2D, // The L2 data cache
701 
702  // We currently do not model L3 caches, as their sizes differ widely between
703  // microarchitectures. Also, we currently do not have a use for L3 cache
704  // size modeling yet.
705  };
706 
707  /// \return The size of the cache level in bytes, if available.
708  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
709 
710  /// \return The associativity of the cache level, if available.
711  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
712 
713  /// \return How much before a load we should place the prefetch instruction.
714  /// This is currently measured in number of instructions.
715  unsigned getPrefetchDistance() const;
716 
717  /// \return Some HW prefetchers can handle accesses up to a certain constant
718  /// stride. This is the minimum stride in bytes where it makes sense to start
719  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
720  unsigned getMinPrefetchStride() const;
721 
722  /// \return The maximum number of iterations to prefetch ahead. If the
723  /// required number of iterations is more than this number, no prefetching is
724  /// performed.
725  unsigned getMaxPrefetchIterationsAhead() const;
726 
727  /// \return The maximum interleave factor that any transform should try to
728  /// perform for this target. This number depends on the level of parallelism
729  /// and the number of execution units in the CPU.
730  unsigned getMaxInterleaveFactor(unsigned VF) const;
731 
732  /// This is an approximation of reciprocal throughput of a math/logic op.
733  /// A higher cost indicates less expected throughput.
734  /// From Agner Fog's guides, reciprocal throughput is "the average number of
735  /// clock cycles per instruction when the instructions are not part of a
736  /// limiting dependency chain."
737  /// Therefore, costs should be scaled to account for multiple execution units
738  /// on the target that can process this type of instruction. For example, if
739  /// there are 5 scalar integer units and 2 vector integer units that can
740  /// calculate an 'add' in a single cycle, this model should indicate that the
741  /// cost of the vector add instruction is 2.5 times the cost of the scalar
742  /// add instruction.
743  /// \p Args is an optional argument which holds the instruction operands
744  /// values so the TTI can analyze those values searching for special
745  /// cases or optimizations based on those values.
746  int getArithmeticInstrCost(
747  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
748  OperandValueKind Opd2Info = OK_AnyValue,
749  OperandValueProperties Opd1PropInfo = OP_None,
750  OperandValueProperties Opd2PropInfo = OP_None,
752 
753  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
754  /// The index and subtype parameters are used by the subvector insertion and
755  /// extraction shuffle kinds.
756  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
757  Type *SubTp = nullptr) const;
758 
759  /// \return The expected cost of cast instructions, such as bitcast, trunc,
760  /// zext, etc. If there is an existing instruction that holds Opcode, it
761  /// may be passed in the 'I' parameter.
762  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
763  const Instruction *I = nullptr) const;
764 
765  /// \return The expected cost of a sign- or zero-extended vector extract. Use
766  /// -1 to indicate that there is no information about the index value.
767  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
768  unsigned Index = -1) const;
769 
770  /// \return The expected cost of control-flow related instructions such as
771  /// Phi, Ret, Br.
772  int getCFInstrCost(unsigned Opcode) const;
773 
774  /// \returns The expected cost of compare and select instructions. If there
775  /// is an existing instruction that holds Opcode, it may be passed in the
776  /// 'I' parameter.
777  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
778  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
779 
780  /// \return The expected cost of vector Insert and Extract.
781  /// Use -1 to indicate that there is no information on the index value.
782  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
783 
784  /// \return The cost of Load and Store instructions.
785  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
786  unsigned AddressSpace, const Instruction *I = nullptr) const;
787 
788  /// \return The cost of masked Load and Store instructions.
789  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
790  unsigned AddressSpace) const;
791 
792  /// \return The cost of Gather or Scatter operation
793  /// \p Opcode - is a type of memory access Load or Store
794  /// \p DataTy - a vector type of the data to be loaded or stored
795  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
796  /// \p VariableMask - true when the memory access is predicated with a mask
797  /// that is not a compile-time constant
798  /// \p Alignment - alignment of single element
799  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
800  bool VariableMask, unsigned Alignment) const;
801 
802  /// \return The cost of the interleaved memory operation.
803  /// \p Opcode is the memory operation code
804  /// \p VecTy is the vector type of the interleaved access.
805  /// \p Factor is the interleave factor
806  /// \p Indices is the indices for interleaved load members (as interleaved
807  /// load allows gaps)
808  /// \p Alignment is the alignment of the memory operation
809  /// \p AddressSpace is address space of the pointer.
810  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
811  ArrayRef<unsigned> Indices, unsigned Alignment,
812  unsigned AddressSpace) const;
813 
814  /// \brief Calculate the cost of performing a vector reduction.
815  ///
816  /// This is the cost of reducing the vector value of type \p Ty to a scalar
817  /// value using the operation denoted by \p Opcode. The form of the reduction
818  /// can either be a pairwise reduction or a reduction that splits the vector
819  /// at every reduction level.
820  ///
821  /// Pairwise:
822  /// (v0, v1, v2, v3)
823  /// ((v0+v1), (v2+v3), undef, undef)
824  /// Split:
825  /// (v0, v1, v2, v3)
826  /// ((v0+v2), (v1+v3), undef, undef)
827  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
828  bool IsPairwiseForm) const;
829  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
830  bool IsUnsigned) const;
831 
832  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
833  /// Three cases are handled: 1. scalar instruction 2. vector instruction
834  /// 3. scalar instruction which is to be vectorized with VF.
835  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
837  unsigned VF = 1) const;
838 
839  /// \returns The cost of Intrinsic instructions. Types analysis only.
840  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
841  /// arguments and the return value will be computed based on types.
842  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
844  unsigned ScalarizationCostPassed = UINT_MAX) const;
845 
846  /// \returns The cost of Call instructions.
847  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
848 
849  /// \returns The number of pieces into which the provided type must be
850  /// split during legalization. Zero is returned when the answer is unknown.
851  unsigned getNumberOfParts(Type *Tp) const;
852 
853  /// \returns The cost of the address computation. For most targets this can be
854  /// merged into the instruction indexing mode. Some targets might want to
855  /// distinguish between address computation for memory operations on vector
856  /// types and scalar types. Such targets should override this function.
857  /// The 'SE' parameter holds pointer for the scalar evolution object which
858  /// is used in order to get the Ptr step value in case of constant stride.
859  /// The 'Ptr' parameter holds SCEV of the access pointer.
860  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
861  const SCEV *Ptr = nullptr) const;
862 
863  /// \returns The cost, if any, of keeping values of the given types alive
864  /// over a callsite.
865  ///
866  /// Some types may require the use of register classes that do not have
867  /// any callee-saved registers, so would require a spill and fill.
868  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
869 
870  /// \returns True if the intrinsic is a supported memory intrinsic. Info
871  /// will contain additional information - whether the intrinsic may write
872  /// or read to memory, volatility and the pointer. Info is undefined
873  /// if false is returned.
874  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
875 
876  /// \returns The maximum element size, in bytes, for an element
877  /// unordered-atomic memory intrinsic.
878  unsigned getAtomicMemIntrinsicMaxElementSize() const;
879 
880  /// \returns A value which is the result of the given memory intrinsic. New
881  /// instructions may be created to extract the result from the given intrinsic
882  /// memory operation. Returns nullptr if the target cannot create a result
883  /// from the given intrinsic.
884  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
885  Type *ExpectedType) const;
886 
887  /// \returns The type to use in a loop expansion of a memcpy call.
888  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
889  unsigned SrcAlign, unsigned DestAlign) const;
890 
891  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
892  /// \param RemainingBytes The number of bytes to copy.
893  ///
894  /// Calculates the operand types to use when copying \p RemainingBytes of
895  /// memory, where source and destination alignments are \p SrcAlign and
896  /// \p DestAlign respectively.
897  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
898  LLVMContext &Context,
899  unsigned RemainingBytes,
900  unsigned SrcAlign,
901  unsigned DestAlign) const;
902 
903  /// \returns True if the two functions have compatible attributes for inlining
904  /// purposes.
905  bool areInlineCompatible(const Function *Caller,
906  const Function *Callee) const;
907 
908  /// \brief The type of load/store indexing.
910  MIM_Unindexed, ///< No indexing.
911  MIM_PreInc, ///< Pre-incrementing.
912  MIM_PreDec, ///< Pre-decrementing.
913  MIM_PostInc, ///< Post-incrementing.
914  MIM_PostDec ///< Post-decrementing.
915  };
916 
917  /// \returns True if the specified indexed load for the given type is legal.
918  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
919 
920  /// \returns True if the specified indexed store for the given type is legal.
921  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
922 
923  /// \returns The bitwidth of the largest vector type that should be used to
924  /// load/store in the given address space.
925  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
926 
927  /// \returns True if the load instruction is legal to vectorize.
928  bool isLegalToVectorizeLoad(LoadInst *LI) const;
929 
930  /// \returns True if the store instruction is legal to vectorize.
931  bool isLegalToVectorizeStore(StoreInst *SI) const;
932 
933  /// \returns True if it is legal to vectorize the given load chain.
934  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
935  unsigned Alignment,
936  unsigned AddrSpace) const;
937 
938  /// \returns True if it is legal to vectorize the given store chain.
939  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
940  unsigned Alignment,
941  unsigned AddrSpace) const;
942 
943  /// \returns The new vector factor value if the target doesn't support \p
944  /// SizeInBytes loads or has a better vector factor.
945  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
946  unsigned ChainSizeInBytes,
947  VectorType *VecTy) const;
948 
949  /// \returns The new vector factor value if the target doesn't support \p
950  /// SizeInBytes stores or has a better vector factor.
951  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
952  unsigned ChainSizeInBytes,
953  VectorType *VecTy) const;
954 
955  /// Flags describing the kind of vector reduction.
956  struct ReductionFlags {
957  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
958  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
959  bool IsSigned; ///< Whether the operation is a signed int reduction.
960  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
961  };
962 
963  /// \returns True if the target wants to handle the given reduction idiom in
964  /// the intrinsics form instead of the shuffle form.
965  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
966  ReductionFlags Flags) const;
967 
968  /// \returns True if the target wants to expand the given reduction intrinsic
969  /// into a shuffle sequence.
970  bool shouldExpandReduction(const IntrinsicInst *II) const;
971  /// @}
972 
973 private:
974  /// \brief Estimate the latency of specified instruction.
975  /// Returns 1 as the default value.
976  int getInstructionLatency(const Instruction *I) const;
977 
978  /// \brief Returns the expected throughput cost of the instruction.
979  /// Returns -1 if the cost is unknown.
980  int getInstructionThroughput(const Instruction *I) const;
981 
982  /// \brief The abstract base class used to type erase specific TTI
983  /// implementations.
984  class Concept;
985 
986  /// \brief The template model for the base class which wraps a concrete
987  /// implementation in a type erased interface.
988  template <typename T> class Model;
989 
990  std::unique_ptr<Concept> TTIImpl;
991 };
992 
994 public:
995  virtual ~Concept() = 0;
996  virtual const DataLayout &getDataLayout() const = 0;
997  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
998  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
999  ArrayRef<const Value *> Operands) = 0;
1000  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1001  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
1002  virtual int getCallCost(const Function *F, int NumArgs) = 0;
1003  virtual int getCallCost(const Function *F,
1005  virtual unsigned getInliningThresholdMultiplier() = 0;
1006  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1007  ArrayRef<Type *> ParamTys) = 0;
1008  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1009  ArrayRef<const Value *> Arguments) = 0;
1010  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1011  unsigned &JTSize) = 0;
1012  virtual int
1013  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1014  virtual bool hasBranchDivergence() = 0;
1015  virtual bool isSourceOfDivergence(const Value *V) = 0;
1016  virtual bool isAlwaysUniform(const Value *V) = 0;
1017  virtual unsigned getFlatAddressSpace() = 0;
1018  virtual bool isLoweredToCall(const Function *F) = 0;
1019  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1020  UnrollingPreferences &UP) = 0;
1021  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1022  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1023  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1024  int64_t BaseOffset, bool HasBaseReg,
1025  int64_t Scale,
1026  unsigned AddrSpace,
1027  Instruction *I) = 0;
1028  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1030  virtual bool canMacroFuseCmp() = 0;
1031  virtual bool shouldFavorPostInc() const = 0;
1032  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1033  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1034  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1035  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1036  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1037  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1038  virtual bool prefersVectorizedAddressing() = 0;
1039  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1040  int64_t BaseOffset, bool HasBaseReg,
1041  int64_t Scale, unsigned AddrSpace) = 0;
1042  virtual bool LSRWithInstrQueries() = 0;
1043  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1044  virtual bool isProfitableToHoist(Instruction *I) = 0;
1045  virtual bool useAA() = 0;
1046  virtual bool isTypeLegal(Type *Ty) = 0;
1047  virtual unsigned getJumpBufAlignment() = 0;
1048  virtual unsigned getJumpBufSize() = 0;
1049  virtual bool shouldBuildLookupTables() = 0;
1050  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1051  virtual bool useColdCCForColdCall(Function &F) = 0;
1052  virtual unsigned
1053  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1054  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1055  unsigned VF) = 0;
1056  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1057  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1058  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1059  bool IsZeroCmp) const = 0;
1060  virtual bool enableInterleavedAccessVectorization() = 0;
1061  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1062  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1063  unsigned BitWidth,
1064  unsigned AddressSpace,
1065  unsigned Alignment,
1066  bool *Fast) = 0;
1067  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1068  virtual bool haveFastSqrt(Type *Ty) = 0;
1069  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1070  virtual int getFPOpCost(Type *Ty) = 0;
1071  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1072  Type *Ty) = 0;
1073  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1074  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1075  Type *Ty) = 0;
1076  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1077  Type *Ty) = 0;
1078  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1079  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1080  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1081  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1082  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1083  virtual bool shouldConsiderAddressTypePromotion(
1084  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1085  virtual unsigned getCacheLineSize() = 0;
1086  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1087  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1088  virtual unsigned getPrefetchDistance() = 0;
1089  virtual unsigned getMinPrefetchStride() = 0;
1090  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1091  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1092  virtual unsigned
1093  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1094  OperandValueKind Opd2Info,
1095  OperandValueProperties Opd1PropInfo,
1096  OperandValueProperties Opd2PropInfo,
1097  ArrayRef<const Value *> Args) = 0;
1098  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1099  Type *SubTp) = 0;
1100  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1101  const Instruction *I) = 0;
1102  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1103  VectorType *VecTy, unsigned Index) = 0;
1104  virtual int getCFInstrCost(unsigned Opcode) = 0;
1105  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1106  Type *CondTy, const Instruction *I) = 0;
1107  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1108  unsigned Index) = 0;
1109  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1110  unsigned AddressSpace, const Instruction *I) = 0;
1111  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1112  unsigned Alignment,
1113  unsigned AddressSpace) = 0;
1114  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1115  Value *Ptr, bool VariableMask,
1116  unsigned Alignment) = 0;
1117  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1118  unsigned Factor,
1119  ArrayRef<unsigned> Indices,
1120  unsigned Alignment,
1121  unsigned AddressSpace) = 0;
1122  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1123  bool IsPairwiseForm) = 0;
1124  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1125  bool IsPairwiseForm, bool IsUnsigned) = 0;
1126  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1128  unsigned ScalarizationCostPassed) = 0;
1129  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1130  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1131  virtual int getCallInstrCost(Function *F, Type *RetTy,
1132  ArrayRef<Type *> Tys) = 0;
1133  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1134  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1135  const SCEV *Ptr) = 0;
1136  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1137  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1138  MemIntrinsicInfo &Info) = 0;
1139  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1140  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1141  Type *ExpectedType) = 0;
1142  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1143  unsigned SrcAlign,
1144  unsigned DestAlign) const = 0;
1145  virtual void getMemcpyLoopResidualLoweringType(
1146  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1147  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1148  virtual bool areInlineCompatible(const Function *Caller,
1149  const Function *Callee) const = 0;
1150  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1151  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1152  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1153  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1154  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1155  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1156  unsigned Alignment,
1157  unsigned AddrSpace) const = 0;
1158  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1159  unsigned Alignment,
1160  unsigned AddrSpace) const = 0;
1161  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1162  unsigned ChainSizeInBytes,
1163  VectorType *VecTy) const = 0;
1164  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1165  unsigned ChainSizeInBytes,
1166  VectorType *VecTy) const = 0;
1167  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1168  ReductionFlags) const = 0;
1169  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1170  virtual int getInstructionLatency(const Instruction *I) = 0;
1171 };
1172 
1173 template <typename T>
1175  T Impl;
1176 
1177 public:
1178  Model(T Impl) : Impl(std::move(Impl)) {}
1179  ~Model() override {}
1180 
1181  const DataLayout &getDataLayout() const override {
1182  return Impl.getDataLayout();
1183  }
1184 
1185  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1186  return Impl.getOperationCost(Opcode, Ty, OpTy);
1187  }
1188  int getGEPCost(Type *PointeeType, const Value *Ptr,
1189  ArrayRef<const Value *> Operands) override {
1190  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1191  }
1192  int getExtCost(const Instruction *I, const Value *Src) override {
1193  return Impl.getExtCost(I, Src);
1194  }
1195  int getCallCost(FunctionType *FTy, int NumArgs) override {
1196  return Impl.getCallCost(FTy, NumArgs);
1197  }
1198  int getCallCost(const Function *F, int NumArgs) override {
1199  return Impl.getCallCost(F, NumArgs);
1200  }
1201  int getCallCost(const Function *F,
1203  return Impl.getCallCost(F, Arguments);
1204  }
1205  unsigned getInliningThresholdMultiplier() override {
1206  return Impl.getInliningThresholdMultiplier();
1207  }
1208  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1209  ArrayRef<Type *> ParamTys) override {
1210  return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1211  }
1212  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1213  ArrayRef<const Value *> Arguments) override {
1214  return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1215  }
1216  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1217  return Impl.getUserCost(U, Operands);
1218  }
1219  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1220  bool isSourceOfDivergence(const Value *V) override {
1221  return Impl.isSourceOfDivergence(V);
1222  }
1223 
1224  bool isAlwaysUniform(const Value *V) override {
1225  return Impl.isAlwaysUniform(V);
1226  }
1227 
1228  unsigned getFlatAddressSpace() override {
1229  return Impl.getFlatAddressSpace();
1230  }
1231 
1232  bool isLoweredToCall(const Function *F) override {
1233  return Impl.isLoweredToCall(F);
1234  }
1235  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1236  UnrollingPreferences &UP) override {
1237  return Impl.getUnrollingPreferences(L, SE, UP);
1238  }
1239  bool isLegalAddImmediate(int64_t Imm) override {
1240  return Impl.isLegalAddImmediate(Imm);
1241  }
1242  bool isLegalICmpImmediate(int64_t Imm) override {
1243  return Impl.isLegalICmpImmediate(Imm);
1244  }
1245  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1246  bool HasBaseReg, int64_t Scale,
1247  unsigned AddrSpace,
1248  Instruction *I) override {
1249  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1250  Scale, AddrSpace, I);
1251  }
1252  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1253  TargetTransformInfo::LSRCost &C2) override {
1254  return Impl.isLSRCostLess(C1, C2);
1255  }
1256  bool canMacroFuseCmp() override {
1257  return Impl.canMacroFuseCmp();
1258  }
1259  bool shouldFavorPostInc() const override {
1260  return Impl.shouldFavorPostInc();
1261  }
1262  bool isLegalMaskedStore(Type *DataType) override {
1263  return Impl.isLegalMaskedStore(DataType);
1264  }
1265  bool isLegalMaskedLoad(Type *DataType) override {
1266  return Impl.isLegalMaskedLoad(DataType);
1267  }
1268  bool isLegalMaskedScatter(Type *DataType) override {
1269  return Impl.isLegalMaskedScatter(DataType);
1270  }
1271  bool isLegalMaskedGather(Type *DataType) override {
1272  return Impl.isLegalMaskedGather(DataType);
1273  }
1274  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1275  return Impl.hasDivRemOp(DataType, IsSigned);
1276  }
1277  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1278  return Impl.hasVolatileVariant(I, AddrSpace);
1279  }
1280  bool prefersVectorizedAddressing() override {
1281  return Impl.prefersVectorizedAddressing();
1282  }
1283  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1284  bool HasBaseReg, int64_t Scale,
1285  unsigned AddrSpace) override {
1286  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1287  Scale, AddrSpace);
1288  }
1289  bool LSRWithInstrQueries() override {
1290  return Impl.LSRWithInstrQueries();
1291  }
1292  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1293  return Impl.isTruncateFree(Ty1, Ty2);
1294  }
1295  bool isProfitableToHoist(Instruction *I) override {
1296  return Impl.isProfitableToHoist(I);
1297  }
1298  bool useAA() override { return Impl.useAA(); }
1299  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1300  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1301  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1302  bool shouldBuildLookupTables() override {
1303  return Impl.shouldBuildLookupTables();
1304  }
1305  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1306  return Impl.shouldBuildLookupTablesForConstant(C);
1307  }
1308  bool useColdCCForColdCall(Function &F) override {
1309  return Impl.useColdCCForColdCall(F);
1310  }
1311 
1312  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1313  bool Extract) override {
1314  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1315  }
1316  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1317  unsigned VF) override {
1318  return Impl.getOperandsScalarizationOverhead(Args, VF);
1319  }
1320 
1321  bool supportsEfficientVectorElementLoadStore() override {
1322  return Impl.supportsEfficientVectorElementLoadStore();
1323  }
1324 
1325  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1326  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1327  }
1328  const MemCmpExpansionOptions *enableMemCmpExpansion(
1329  bool IsZeroCmp) const override {
1330  return Impl.enableMemCmpExpansion(IsZeroCmp);
1331  }
1332  bool enableInterleavedAccessVectorization() override {
1333  return Impl.enableInterleavedAccessVectorization();
1334  }
1335  bool isFPVectorizationPotentiallyUnsafe() override {
1336  return Impl.isFPVectorizationPotentiallyUnsafe();
1337  }
1338  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1339  unsigned BitWidth, unsigned AddressSpace,
1340  unsigned Alignment, bool *Fast) override {
1341  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1342  Alignment, Fast);
1343  }
1344  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1345  return Impl.getPopcntSupport(IntTyWidthInBit);
1346  }
1347  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1348 
1349  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1350  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1351  }
1352 
1353  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1354 
1355  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1356  Type *Ty) override {
1357  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1358  }
1359  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1360  return Impl.getIntImmCost(Imm, Ty);
1361  }
1362  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1363  Type *Ty) override {
1364  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1365  }
1366  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1367  Type *Ty) override {
1368  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1369  }
1370  unsigned getNumberOfRegisters(bool Vector) override {
1371  return Impl.getNumberOfRegisters(Vector);
1372  }
1373  unsigned getRegisterBitWidth(bool Vector) const override {
1374  return Impl.getRegisterBitWidth(Vector);
1375  }
1376  unsigned getMinVectorRegisterBitWidth() override {
1377  return Impl.getMinVectorRegisterBitWidth();
1378  }
1379  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1380  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1381  }
1382  unsigned getMinimumVF(unsigned ElemWidth) const override {
1383  return Impl.getMinimumVF(ElemWidth);
1384  }
1385  bool shouldConsiderAddressTypePromotion(
1386  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1387  return Impl.shouldConsiderAddressTypePromotion(
1388  I, AllowPromotionWithoutCommonHeader);
1389  }
1390  unsigned getCacheLineSize() override {
1391  return Impl.getCacheLineSize();
1392  }
1393  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1394  return Impl.getCacheSize(Level);
1395  }
1396  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1397  return Impl.getCacheAssociativity(Level);
1398  }
1399  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1400  unsigned getMinPrefetchStride() override {
1401  return Impl.getMinPrefetchStride();
1402  }
1403  unsigned getMaxPrefetchIterationsAhead() override {
1404  return Impl.getMaxPrefetchIterationsAhead();
1405  }
1406  unsigned getMaxInterleaveFactor(unsigned VF) override {
1407  return Impl.getMaxInterleaveFactor(VF);
1408  }
1409  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1410  unsigned &JTSize) override {
1411  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1412  }
1413  unsigned
1414  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1415  OperandValueKind Opd2Info,
1416  OperandValueProperties Opd1PropInfo,
1417  OperandValueProperties Opd2PropInfo,
1418  ArrayRef<const Value *> Args) override {
1419  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1420  Opd1PropInfo, Opd2PropInfo, Args);
1421  }
1422  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1423  Type *SubTp) override {
1424  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1425  }
1426  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1427  const Instruction *I) override {
1428  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1429  }
1430  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1431  unsigned Index) override {
1432  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1433  }
1434  int getCFInstrCost(unsigned Opcode) override {
1435  return Impl.getCFInstrCost(Opcode);
1436  }
1437  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1438  const Instruction *I) override {
1439  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1440  }
1441  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1442  return Impl.getVectorInstrCost(Opcode, Val, Index);
1443  }
1444  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1445  unsigned AddressSpace, const Instruction *I) override {
1446  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1447  }
1448  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1449  unsigned AddressSpace) override {
1450  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1451  }
1452  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1453  Value *Ptr, bool VariableMask,
1454  unsigned Alignment) override {
1455  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1456  Alignment);
1457  }
1458  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1459  ArrayRef<unsigned> Indices, unsigned Alignment,
1460  unsigned AddressSpace) override {
1461  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1462  Alignment, AddressSpace);
1463  }
1464  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1465  bool IsPairwiseForm) override {
1466  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1467  }
1468  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1469  bool IsPairwiseForm, bool IsUnsigned) override {
1470  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1471  }
1472  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1473  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1474  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1475  ScalarizationCostPassed);
1476  }
1477  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1478  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1479  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1480  }
1481  int getCallInstrCost(Function *F, Type *RetTy,
1482  ArrayRef<Type *> Tys) override {
1483  return Impl.getCallInstrCost(F, RetTy, Tys);
1484  }
1485  unsigned getNumberOfParts(Type *Tp) override {
1486  return Impl.getNumberOfParts(Tp);
1487  }
1488  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1489  const SCEV *Ptr) override {
1490  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1491  }
1492  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1493  return Impl.getCostOfKeepingLiveOverCall(Tys);
1494  }
1495  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1496  MemIntrinsicInfo &Info) override {
1497  return Impl.getTgtMemIntrinsic(Inst, Info);
1498  }
1499  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1500  return Impl.getAtomicMemIntrinsicMaxElementSize();
1501  }
1502  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1503  Type *ExpectedType) override {
1504  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1505  }
1506  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1507  unsigned SrcAlign,
1508  unsigned DestAlign) const override {
1509  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1510  }
1511  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1512  LLVMContext &Context,
1513  unsigned RemainingBytes,
1514  unsigned SrcAlign,
1515  unsigned DestAlign) const override {
1516  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1517  SrcAlign, DestAlign);
1518  }
1519  bool areInlineCompatible(const Function *Caller,
1520  const Function *Callee) const override {
1521  return Impl.areInlineCompatible(Caller, Callee);
1522  }
1523  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1524  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1525  }
1526  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1527  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1528  }
1529  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1530  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1531  }
1532  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1533  return Impl.isLegalToVectorizeLoad(LI);
1534  }
1535  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1536  return Impl.isLegalToVectorizeStore(SI);
1537  }
1538  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1539  unsigned Alignment,
1540  unsigned AddrSpace) const override {
1541  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1542  AddrSpace);
1543  }
1544  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1545  unsigned Alignment,
1546  unsigned AddrSpace) const override {
1547  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1548  AddrSpace);
1549  }
1550  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1551  unsigned ChainSizeInBytes,
1552  VectorType *VecTy) const override {
1553  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1554  }
1555  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1556  unsigned ChainSizeInBytes,
1557  VectorType *VecTy) const override {
1558  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1559  }
1560  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1561  ReductionFlags Flags) const override {
1562  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1563  }
1564  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1565  return Impl.shouldExpandReduction(II);
1566  }
1567  int getInstructionLatency(const Instruction *I) override {
1568  return Impl.getInstructionLatency(I);
1569  }
1570 };
1571 
1572 template <typename T>
1574  : TTIImpl(new Model<T>(Impl)) {}
1575 
1576 /// \brief Analysis pass providing the \c TargetTransformInfo.
1577 ///
1578 /// The core idea of the TargetIRAnalysis is to expose an interface through
1579 /// which LLVM targets can analyze and provide information about the middle
1580 /// end's target-independent IR. This supports use cases such as target-aware
1581 /// cost modeling of IR constructs.
1582 ///
1583 /// This is a function analysis because much of the cost modeling for targets
1584 /// is done in a subtarget specific way and LLVM supports compiling different
1585 /// functions targeting different subtargets in order to support runtime
1586 /// dispatch according to the observed subtarget.
1587 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1588 public:
1590 
1591  /// \brief Default construct a target IR analysis.
1592  ///
1593  /// This will use the module's datalayout to construct a baseline
1594  /// conservative TTI result.
1595  TargetIRAnalysis();
1596 
1597  /// \brief Construct an IR analysis pass around a target-provide callback.
1598  ///
1599  /// The callback will be called with a particular function for which the TTI
1600  /// is needed and must return a TTI object for that function.
1601  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1602 
1603  // Value semantics. We spell out the constructors for MSVC.
1605  : TTICallback(Arg.TTICallback) {}
1607  : TTICallback(std::move(Arg.TTICallback)) {}
1609  TTICallback = RHS.TTICallback;
1610  return *this;
1611  }
1613  TTICallback = std::move(RHS.TTICallback);
1614  return *this;
1615  }
1616 
1617  Result run(const Function &F, FunctionAnalysisManager &);
1618 
1619 private:
1621  static AnalysisKey Key;
1622 
1623  /// \brief The callback used to produce a result.
1624  ///
1625  /// We use a completely opaque callback so that targets can provide whatever
1626  /// mechanism they desire for constructing the TTI for a given function.
1627  ///
1628  /// FIXME: Should we really use std::function? It's relatively inefficient.
1629  /// It might be possible to arrange for even stateful callbacks to outlive
1630  /// the analysis and thus use a function_ref which would be lighter weight.
1631  /// This may also be less error prone as the callback is likely to reference
1632  /// the external TargetMachine, and that reference needs to never dangle.
1633  std::function<Result(const Function &)> TTICallback;
1634 
1635  /// \brief Helper function used as the callback in the default constructor.
1636  static Result getDefaultTTI(const Function &F);
1637 };
1638 
1639 /// \brief Wrapper pass for TargetTransformInfo.
1640 ///
1641 /// This pass can be constructed from a TTI object which it stores internally
1642 /// and is queried by passes.
1644  TargetIRAnalysis TIRA;
1646 
1647  virtual void anchor();
1648 
1649 public:
1650  static char ID;
1651 
1652  /// \brief We must provide a default constructor for the pass but it should
1653  /// never be used.
1654  ///
1655  /// Use the constructor below or call one of the creation routines.
1657 
1659 
1660  TargetTransformInfo &getTTI(const Function &F);
1661 };
1662 
1663 /// \brief Create an analysis pass wrapper around a TTI object.
1664 ///
1665 /// This analysis pass just holds the TTI instance and makes it available to
1666 /// clients.
1668 
1669 } // End llvm namespace
1670 
1671 #endif
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
AMDGPU Rewrite Out Arguments
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
Estimate the overhead of scalarizing an instruction.
value_op_iterator value_op_begin()
Definition: User.h:256
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:259
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: BitVector.h:921
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Choose alternate elements from vector.
Class to represent function types.
Definition: DerivedTypes.h:103
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
An instruction for storing to memory.
Definition: Instructions.h:306
Reverse the order of the vector.
amdgpu Simplify well known AMD library false Value * Callee
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
This is an important base class in LLVM.
Definition: Constant.h:42
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:382
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
AddressSpace
Definition: NVPTXBaseInfo.h:22
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:69
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
amdgpu Simplify well known AMD library false Value Value * Arg
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:559
const unsigned Kind
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:73
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.