LCOV - code coverage report
Current view: top level - include/llvm/Analysis - TargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 467 735 63.5 %
Date: 2018-10-20 13:21:21 Functions: 657 1562 42.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file
      10             : /// This pass exposes codegen information to IR-level passes. Every
      11             : /// transformation that uses codegen information is broken into three parts:
      12             : /// 1. The IR-level analysis pass.
      13             : /// 2. The IR-level transformation interface which provides the needed
      14             : ///    information.
      15             : /// 3. Codegen-level implementation which uses target-specific hooks.
      16             : ///
      17             : /// This file defines #2, which is the interface that IR-level transformations
      18             : /// use for querying the codegen.
      19             : ///
      20             : //===----------------------------------------------------------------------===//
      21             : 
      22             : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      23             : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      24             : 
      25             : #include "llvm/ADT/Optional.h"
      26             : #include "llvm/IR/Operator.h"
      27             : #include "llvm/IR/PassManager.h"
      28             : #include "llvm/Pass.h"
      29             : #include "llvm/Support/AtomicOrdering.h"
      30             : #include "llvm/Support/DataTypes.h"
      31             : #include <functional>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : namespace Intrinsic {
      36             : enum ID : unsigned;
      37             : }
      38             : 
      39             : class Function;
      40             : class GlobalValue;
      41             : class IntrinsicInst;
      42             : class LoadInst;
      43             : class Loop;
      44             : class SCEV;
      45             : class ScalarEvolution;
      46             : class StoreInst;
      47             : class SwitchInst;
      48             : class Type;
      49             : class User;
      50             : class Value;
      51             : 
      52             : /// Information about a load/store intrinsic defined by the target.
      53     3333800 : struct MemIntrinsicInfo {
      54             :   /// This is the pointer that the intrinsic is loading from or storing to.
      55             :   /// If this is non-null, then analysis/optimization passes can assume that
      56             :   /// this intrinsic is functionally equivalent to a load/store from this
      57             :   /// pointer.
      58             :   Value *PtrVal = nullptr;
      59             : 
      60             :   // Ordering for atomic operations.
      61             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
      62             : 
      63             :   // Same Id is set by the target for corresponding load/store intrinsics.
      64             :   unsigned short MatchingId = 0;
      65             : 
      66             :   bool ReadMem = false;
      67             :   bool WriteMem = false;
      68             :   bool IsVolatile = false;
      69             : 
      70           0 :   bool isUnordered() const {
      71         806 :     return (Ordering == AtomicOrdering::NotAtomic ||
      72         806 :             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
      73             :   }
      74             : };
      75             : 
      76             : /// This pass provides access to the codegen interfaces that are needed
      77             : /// for IR-level transformations.
      78             : class TargetTransformInfo {
      79             : public:
      80             :   /// Construct a TTI object using a type implementing the \c Concept
      81             :   /// API below.
      82             :   ///
      83             :   /// This is used by targets to construct a TTI wrapping their target-specific
      84             :   /// implementaion that encodes appropriate costs for their target.
      85             :   template <typename T> TargetTransformInfo(T Impl);
      86             : 
      87             :   /// Construct a baseline TTI object using a minimal implementation of
      88             :   /// the \c Concept API below.
      89             :   ///
      90             :   /// The TTI implementation will reflect the information in the DataLayout
      91             :   /// provided if non-null.
      92             :   explicit TargetTransformInfo(const DataLayout &DL);
      93             : 
      94             :   // Provide move semantics.
      95             :   TargetTransformInfo(TargetTransformInfo &&Arg);
      96             :   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
      97             : 
      98             :   // We need to define the destructor out-of-line to define our sub-classes
      99             :   // out-of-line.
     100             :   ~TargetTransformInfo();
     101             : 
     102             :   /// Handle the invalidation of this information.
     103             :   ///
     104             :   /// When used as a result of \c TargetIRAnalysis this method will be called
     105             :   /// when the function this was computed for changes. When it returns false,
     106             :   /// the information is preserved across those changes.
     107           0 :   bool invalidate(Function &, const PreservedAnalyses &,
     108             :                   FunctionAnalysisManager::Invalidator &) {
     109             :     // FIXME: We should probably in some way ensure that the subtarget
     110             :     // information for a function hasn't changed.
     111           0 :     return false;
     112             :   }
     113             : 
     114             :   /// \name Generic Target Information
     115             :   /// @{
     116             : 
     117             :   /// The kind of cost model.
     118             :   ///
     119             :   /// There are several different cost models that can be customized by the
     120             :   /// target. The normalization of each cost model may be target specific.
     121             :   enum TargetCostKind {
     122             :     TCK_RecipThroughput, ///< Reciprocal throughput.
     123             :     TCK_Latency,         ///< The latency of instruction.
     124             :     TCK_CodeSize         ///< Instruction code size.
     125             :   };
     126             : 
     127             :   /// Query the cost of a specified instruction.
     128             :   ///
     129             :   /// Clients should use this interface to query the cost of an existing
     130             :   /// instruction. The instruction must have a valid parent (basic block).
     131             :   ///
     132             :   /// Note, this method does not cache the cost calculation and it
     133             :   /// can be expensive in some cases.
     134       27770 :   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
     135       27770 :     switch (kind){
     136       27748 :     case TCK_RecipThroughput:
     137       30648 :       return getInstructionThroughput(I);
     138             : 
     139          11 :     case TCK_Latency:
     140          11 :       return getInstructionLatency(I);
     141             : 
     142          11 :     case TCK_CodeSize:
     143         234 :       return getUserCost(I);
     144             :     }
     145           0 :     llvm_unreachable("Unknown instruction cost kind");
     146             :   }
     147             : 
     148             :   /// Underlying constants for 'cost' values in this interface.
     149             :   ///
     150             :   /// Many APIs in this interface return a cost. This enum defines the
     151             :   /// fundamental values that should be used to interpret (and produce) those
     152             :   /// costs. The costs are returned as an int rather than a member of this
     153             :   /// enumeration because it is expected that the cost of one IR instruction
     154             :   /// may have a multiplicative factor to it or otherwise won't fit directly
     155             :   /// into the enum. Moreover, it is common to sum or average costs which works
     156             :   /// better as simple integral values. Thus this enum only provides constants.
     157             :   /// Also note that the returned costs are signed integers to make it natural
     158             :   /// to add, subtract, and test with zero (a common boundary condition). It is
     159             :   /// not expected that 2^32 is a realistic cost to be modeling at any point.
     160             :   ///
     161             :   /// Note that these costs should usually reflect the intersection of code-size
     162             :   /// cost and execution cost. A free instruction is typically one that folds
     163             :   /// into another instruction. For example, reg-to-reg moves can often be
     164             :   /// skipped by renaming the registers in the CPU, but they still are encoded
     165             :   /// and thus wouldn't be considered 'free' here.
     166             :   enum TargetCostConstants {
     167             :     TCC_Free = 0,     ///< Expected to fold away in lowering.
     168             :     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
     169             :     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
     170             :   };
     171             : 
     172             :   /// Estimate the cost of a specific operation when lowered.
     173             :   ///
     174             :   /// Note that this is designed to work on an arbitrary synthetic opcode, and
     175             :   /// thus work for hypothetical queries before an instruction has even been
     176             :   /// formed. However, this does *not* work for GEPs, and must not be called
     177             :   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
     178             :   /// analyzing a GEP's cost required more information.
     179             :   ///
     180             :   /// Typically only the result type is required, and the operand type can be
     181             :   /// omitted. However, if the opcode is one of the cast instructions, the
     182             :   /// operand type is required.
     183             :   ///
     184             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     185             :   /// comments for a detailed explanation of the cost values.
     186             :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
     187             : 
     188             :   /// Estimate the cost of a GEP operation when lowered.
     189             :   ///
     190             :   /// The contract for this function is the same as \c getOperationCost except
     191             :   /// that it supports an interface that provides extra information specific to
     192             :   /// the GEP operation.
     193             :   int getGEPCost(Type *PointeeType, const Value *Ptr,
     194             :                  ArrayRef<const Value *> Operands) const;
     195             : 
     196             :   /// Estimate the cost of a EXT operation when lowered.
     197             :   ///
     198             :   /// The contract for this function is the same as \c getOperationCost except
     199             :   /// that it supports an interface that provides extra information specific to
     200             :   /// the EXT operation.
     201             :   int getExtCost(const Instruction *I, const Value *Src) const;
     202             : 
     203             :   /// Estimate the cost of a function call when lowered.
     204             :   ///
     205             :   /// The contract for this is the same as \c getOperationCost except that it
     206             :   /// supports an interface that provides extra information specific to call
     207             :   /// instructions.
     208             :   ///
     209             :   /// This is the most basic query for estimating call cost: it only knows the
     210             :   /// function type and (potentially) the number of arguments at the call site.
     211             :   /// The latter is only interesting for varargs function types.
     212             :   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
     213             : 
     214             :   /// Estimate the cost of calling a specific function when lowered.
     215             :   ///
     216             :   /// This overload adds the ability to reason about the particular function
     217             :   /// being called in the event it is a library call with special lowering.
     218             :   int getCallCost(const Function *F, int NumArgs = -1) const;
     219             : 
     220             :   /// Estimate the cost of calling a specific function when lowered.
     221             :   ///
     222             :   /// This overload allows specifying a set of candidate argument values.
     223             :   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
     224             : 
     225             :   /// \returns A value by which our inlining threshold should be multiplied.
     226             :   /// This is primarily used to bump up the inlining threshold wholesale on
     227             :   /// targets where calls are unusually expensive.
     228             :   ///
     229             :   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
     230             :   /// individual classes of instructions would be better.
     231             :   unsigned getInliningThresholdMultiplier() const;
     232             : 
     233             :   /// Estimate the cost of an intrinsic when lowered.
     234             :   ///
     235             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     236             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     237             :                        ArrayRef<Type *> ParamTys) const;
     238             : 
     239             :   /// Estimate the cost of an intrinsic when lowered.
     240             :   ///
     241             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     242             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     243             :                        ArrayRef<const Value *> Arguments) const;
     244             : 
     245             :   /// \return The estimated number of case clusters when lowering \p 'SI'.
     246             :   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
     247             :   /// table.
     248             :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     249             :                                             unsigned &JTSize) const;
     250             : 
     251             :   /// Estimate the cost of a given IR user when lowered.
     252             :   ///
     253             :   /// This can estimate the cost of either a ConstantExpr or Instruction when
     254             :   /// lowered. It has two primary advantages over the \c getOperationCost and
     255             :   /// \c getGEPCost above, and one significant disadvantage: it can only be
     256             :   /// used when the IR construct has already been formed.
     257             :   ///
     258             :   /// The advantages are that it can inspect the SSA use graph to reason more
     259             :   /// accurately about the cost. For example, all-constant-GEPs can often be
     260             :   /// folded into a load or other instruction, but if they are used in some
     261             :   /// other context they may not be folded. This routine can distinguish such
     262             :   /// cases.
     263             :   ///
     264             :   /// \p Operands is a list of operands which can be a result of transformations
     265             :   /// of the current operands. The number of the operands on the list must equal
     266             :   /// to the number of the current operands the IR user has. Their order on the
     267             :   /// list must be the same as the order of the current operands the IR user
     268             :   /// has.
     269             :   ///
     270             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     271             :   /// comments for a detailed explanation of the cost values.
     272             :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
     273             : 
     274             :   /// This is a helper function which calls the two-argument getUserCost
     275             :   /// with \p Operands which are the current operands U has.
     276     4009483 :   int getUserCost(const User *U) const {
     277             :     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
     278     4009483 :                                            U->value_op_end());
     279     4009483 :     return getUserCost(U, Operands);
     280             :   }
     281             : 
     282             :   /// Return true if branch divergence exists.
     283             :   ///
     284             :   /// Branch divergence has a significantly negative impact on GPU performance
     285             :   /// when threads in the same wavefront take different paths due to conditional
     286             :   /// branches.
     287             :   bool hasBranchDivergence() const;
     288             : 
     289             :   /// Returns whether V is a source of divergence.
     290             :   ///
     291             :   /// This function provides the target-dependent information for
     292             :   /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
     293             :   /// builds the dependency graph, and then runs the reachability algorithm
     294             :   /// starting with the sources of divergence.
     295             :   bool isSourceOfDivergence(const Value *V) const;
     296             : 
     297             :   // Returns true for the target specific
     298             :   // set of operations which produce uniform result
     299             :   // even taking non-unform arguments
     300             :   bool isAlwaysUniform(const Value *V) const;
     301             : 
     302             :   /// Returns the address space ID for a target's 'flat' address space. Note
     303             :   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
     304             :   /// refers to as the generic address space. The flat address space is a
     305             :   /// generic address space that can be used access multiple segments of memory
     306             :   /// with different address spaces. Access of a memory location through a
     307             :   /// pointer with this address space is expected to be legal but slower
     308             :   /// compared to the same memory location accessed through a pointer with a
     309             :   /// different address space.
     310             :   //
     311             :   /// This is for targets with different pointer representations which can
     312             :   /// be converted with the addrspacecast instruction. If a pointer is converted
     313             :   /// to this address space, optimizations should attempt to replace the access
     314             :   /// with the source address space.
     315             :   ///
     316             :   /// \returns ~0u if the target does not have such a flat address space to
     317             :   /// optimize away.
     318             :   unsigned getFlatAddressSpace() const;
     319             : 
     320             :   /// Test whether calls to a function lower to actual program function
     321             :   /// calls.
     322             :   ///
     323             :   /// The idea is to test whether the program is likely to require a 'call'
     324             :   /// instruction or equivalent in order to call the given function.
     325             :   ///
     326             :   /// FIXME: It's not clear that this is a good or useful query API. Client's
     327             :   /// should probably move to simpler cost metrics using the above.
     328             :   /// Alternatively, we could split the cost interface into distinct code-size
     329             :   /// and execution-speed costs. This would allow modelling the core of this
     330             :   /// query more accurately as a call is a single small instruction, but
     331             :   /// incurs significant execution cost.
     332             :   bool isLoweredToCall(const Function *F) const;
     333             : 
     334             :   struct LSRCost {
     335             :     /// TODO: Some of these could be merged. Also, a lexical ordering
     336             :     /// isn't always optimal.
     337             :     unsigned Insns;
     338             :     unsigned NumRegs;
     339             :     unsigned AddRecCost;
     340             :     unsigned NumIVMuls;
     341             :     unsigned NumBaseAdds;
     342             :     unsigned ImmCost;
     343             :     unsigned SetupCost;
     344             :     unsigned ScaleCost;
     345             :   };
     346             : 
     347             :   /// Parameters that control the generic loop unrolling transformation.
     348             :   struct UnrollingPreferences {
     349             :     /// The cost threshold for the unrolled loop. Should be relative to the
     350             :     /// getUserCost values returned by this API, and the expectation is that
     351             :     /// the unrolled loop's instructions when run through that interface should
     352             :     /// not exceed this cost. However, this is only an estimate. Also, specific
     353             :     /// loops may be unrolled even with a cost above this threshold if deemed
     354             :     /// profitable. Set this to UINT_MAX to disable the loop body cost
     355             :     /// restriction.
     356             :     unsigned Threshold;
     357             :     /// If complete unrolling will reduce the cost of the loop, we will boost
     358             :     /// the Threshold by a certain percent to allow more aggressive complete
     359             :     /// unrolling. This value provides the maximum boost percentage that we
     360             :     /// can apply to Threshold (The value should be no less than 100).
     361             :     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
     362             :     ///                                    MaxPercentThresholdBoost / 100)
     363             :     /// E.g. if complete unrolling reduces the loop execution time by 50%
     364             :     /// then we boost the threshold by the factor of 2x. If unrolling is not
     365             :     /// expected to reduce the running time, then we do not increase the
     366             :     /// threshold.
     367             :     unsigned MaxPercentThresholdBoost;
     368             :     /// The cost threshold for the unrolled loop when optimizing for size (set
     369             :     /// to UINT_MAX to disable).
     370             :     unsigned OptSizeThreshold;
     371             :     /// The cost threshold for the unrolled loop, like Threshold, but used
     372             :     /// for partial/runtime unrolling (set to UINT_MAX to disable).
     373             :     unsigned PartialThreshold;
     374             :     /// The cost threshold for the unrolled loop when optimizing for size, like
     375             :     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
     376             :     /// UINT_MAX to disable).
     377             :     unsigned PartialOptSizeThreshold;
     378             :     /// A forced unrolling factor (the number of concatenated bodies of the
     379             :     /// original loop in the unrolled loop body). When set to 0, the unrolling
     380             :     /// transformation will select an unrolling factor based on the current cost
     381             :     /// threshold and other factors.
     382             :     unsigned Count;
     383             :     /// A forced peeling factor (the number of bodied of the original loop
     384             :     /// that should be peeled off before the loop body). When set to 0, the
     385             :     /// unrolling transformation will select a peeling factor based on profile
     386             :     /// information and other factors.
     387             :     unsigned PeelCount;
     388             :     /// Default unroll count for loops with run-time trip count.
     389             :     unsigned DefaultUnrollRuntimeCount;
     390             :     // Set the maximum unrolling factor. The unrolling factor may be selected
     391             :     // using the appropriate cost threshold, but may not exceed this number
     392             :     // (set to UINT_MAX to disable). This does not apply in cases where the
     393             :     // loop is being fully unrolled.
     394             :     unsigned MaxCount;
     395             :     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     396             :     /// applies even if full unrolling is selected. This allows a target to fall
     397             :     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     398             :     unsigned FullUnrollMaxCount;
     399             :     // Represents number of instructions optimized when "back edge"
     400             :     // becomes "fall through" in unrolled loop.
     401             :     // For now we count a conditional branch on a backedge and a comparison
     402             :     // feeding it.
     403             :     unsigned BEInsns;
     404             :     /// Allow partial unrolling (unrolling of loops to expand the size of the
     405             :     /// loop body, not only to eliminate small constant-trip-count loops).
     406             :     bool Partial;
     407             :     /// Allow runtime unrolling (unrolling of loops to expand the size of the
     408             :     /// loop body even when the number of loop iterations is not known at
     409             :     /// compile time).
     410             :     bool Runtime;
     411             :     /// Allow generation of a loop remainder (extra iterations after unroll).
     412             :     bool AllowRemainder;
     413             :     /// Allow emitting expensive instructions (such as divisions) when computing
     414             :     /// the trip count of a loop for runtime unrolling.
     415             :     bool AllowExpensiveTripCount;
     416             :     /// Apply loop unroll on any kind of loop
     417             :     /// (mainly to loops that fail runtime unrolling).
     418             :     bool Force;
     419             :     /// Allow using trip count upper bound to unroll loops.
     420             :     bool UpperBound;
     421             :     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     422             :     bool AllowPeeling;
     423             :     /// Allow unrolling of all the iterations of the runtime loop remainder.
     424             :     bool UnrollRemainder;
     425             :     /// Allow unroll and jam. Used to enable unroll and jam for the target.
     426             :     bool UnrollAndJam;
     427             :     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
     428             :     /// value above is used during unroll and jam for the outer loop size.
     429             :     /// This value is used in the same manner to limit the size of the inner
     430             :     /// loop.
     431             :     unsigned UnrollAndJamInnerLoopThreshold;
     432             :   };
     433             : 
     434             :   /// Get target-customized preferences for the generic loop unrolling
     435             :   /// transformation. The caller will initialize UP with the current
     436             :   /// target-independent defaults.
     437             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     438             :                                UnrollingPreferences &UP) const;
     439             : 
     440             :   /// @}
     441             : 
     442             :   /// \name Scalar Target Information
     443             :   /// @{
     444             : 
     445             :   /// Flags indicating the kind of support for population count.
     446             :   ///
     447             :   /// Compared to the SW implementation, HW support is supposed to
     448             :   /// significantly boost the performance when the population is dense, and it
     449             :   /// may or may not degrade performance if the population is sparse. A HW
     450             :   /// support is considered as "Fast" if it can outperform, or is on a par
     451             :   /// with, SW implementation when the population is sparse; otherwise, it is
     452             :   /// considered as "Slow".
     453             :   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
     454             : 
     455             :   /// Return true if the specified immediate is legal add immediate, that
     456             :   /// is the target has add instructions which can add a register with the
     457             :   /// immediate without having to materialize the immediate into a register.
     458             :   bool isLegalAddImmediate(int64_t Imm) const;
     459             : 
     460             :   /// Return true if the specified immediate is legal icmp immediate,
     461             :   /// that is the target has icmp instructions which can compare a register
     462             :   /// against the immediate without having to materialize the immediate into a
     463             :   /// register.
     464             :   bool isLegalICmpImmediate(int64_t Imm) const;
     465             : 
     466             :   /// Return true if the addressing mode represented by AM is legal for
     467             :   /// this target, for a load/store of the specified type.
     468             :   /// The type may be VoidTy, in which case only return true if the addressing
     469             :   /// mode is legal for a load/store of any legal type.
     470             :   /// If target returns true in LSRWithInstrQueries(), I may be valid.
     471             :   /// TODO: Handle pre/postinc as well.
     472             :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     473             :                              bool HasBaseReg, int64_t Scale,
     474             :                              unsigned AddrSpace = 0,
     475             :                              Instruction *I = nullptr) const;
     476             : 
     477             :   /// Return true if LSR cost of C1 is lower than C1.
     478             :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     479             :                      TargetTransformInfo::LSRCost &C2) const;
     480             : 
     481             :   /// Return true if the target can fuse a compare and branch.
     482             :   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
     483             :   /// calculation for the instructions in a loop.
     484             :   bool canMacroFuseCmp() const;
     485             : 
     486             :   /// \return True is LSR should make efforts to create/preserve post-inc
     487             :   /// addressing mode expressions.
     488             :   bool shouldFavorPostInc() const;
     489             : 
     490             :   /// Return true if the target supports masked load/store
     491             :   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
     492             :   bool isLegalMaskedStore(Type *DataType) const;
     493             :   bool isLegalMaskedLoad(Type *DataType) const;
     494             : 
     495             :   /// Return true if the target supports masked gather/scatter
     496             :   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
     497             :   /// bits scalar type.
     498             :   bool isLegalMaskedScatter(Type *DataType) const;
     499             :   bool isLegalMaskedGather(Type *DataType) const;
     500             : 
     501             :   /// Return true if the target has a unified operation to calculate division
     502             :   /// and remainder. If so, the additional implicit multiplication and
     503             :   /// subtraction required to calculate a remainder from division are free. This
     504             :   /// can enable more aggressive transformations for division and remainder than
     505             :   /// would typically be allowed using throughput or size cost models.
     506             :   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
     507             : 
     508             :   /// Return true if the given instruction (assumed to be a memory access
     509             :   /// instruction) has a volatile variant. If that's the case then we can avoid
     510             :   /// addrspacecast to generic AS for volatile loads/stores. Default
     511             :   /// implementation returns false, which prevents address space inference for
     512             :   /// volatile loads/stores.
     513             :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
     514             : 
     515             :   /// Return true if target doesn't mind addresses in vectors.
     516             :   bool prefersVectorizedAddressing() const;
     517             : 
     518             :   /// Return the cost of the scaling factor used in the addressing
     519             :   /// mode represented by AM for this target, for a load/store
     520             :   /// of the specified type.
     521             :   /// If the AM is supported, the return value must be >= 0.
     522             :   /// If the AM is not supported, it returns a negative value.
     523             :   /// TODO: Handle pre/postinc as well.
     524             :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     525             :                            bool HasBaseReg, int64_t Scale,
     526             :                            unsigned AddrSpace = 0) const;
     527             : 
     528             :   /// Return true if the loop strength reduce pass should make
     529             :   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
     530             :   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
     531             :   /// immediate offset and no index register.
     532             :   bool LSRWithInstrQueries() const;
     533             : 
     534             :   /// Return true if it's free to truncate a value of type Ty1 to type
     535             :   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
     536             :   /// by referencing its sub-register AX.
     537             :   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
     538             : 
     539             :   /// Return true if it is profitable to hoist instruction in the
     540             :   /// then/else to before if.
     541             :   bool isProfitableToHoist(Instruction *I) const;
     542             : 
     543             :   bool useAA() const;
     544             : 
     545             :   /// Return true if this type is legal.
     546             :   bool isTypeLegal(Type *Ty) const;
     547             : 
     548             :   /// Returns the target's jmp_buf alignment in bytes.
     549             :   unsigned getJumpBufAlignment() const;
     550             : 
     551             :   /// Returns the target's jmp_buf size in bytes.
     552             :   unsigned getJumpBufSize() const;
     553             : 
     554             :   /// Return true if switches should be turned into lookup tables for the
     555             :   /// target.
     556             :   bool shouldBuildLookupTables() const;
     557             : 
     558             :   /// Return true if switches should be turned into lookup tables
     559             :   /// containing this constant value for the target.
     560             :   bool shouldBuildLookupTablesForConstant(Constant *C) const;
     561             : 
     562             :   /// Return true if the input function which is cold at all call sites,
     563             :   ///  should use coldcc calling convention.
     564             :   bool useColdCCForColdCall(Function &F) const;
     565             : 
     566             :   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
     567             : 
     568             :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     569             :                                             unsigned VF) const;
     570             : 
     571             :   /// If target has efficient vector element load/store instructions, it can
     572             :   /// return true here so that insertion/extraction costs are not added to
     573             :   /// the scalarization cost of a load/store.
     574             :   bool supportsEfficientVectorElementLoadStore() const;
     575             : 
     576             :   /// Don't restrict interleaved unrolling to small loops.
     577             :   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
     578             : 
     579             :   /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
     580             :   /// true if this is the expansion of memcmp(p1, p2, s) == 0.
     581             :   struct MemCmpExpansionOptions {
     582             :     // The list of available load sizes (in bytes), sorted in decreasing order.
     583             :     SmallVector<unsigned, 8> LoadSizes;
     584             :   };
     585             :   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
     586             : 
     587             :   /// Enable matching of interleaved access groups.
     588             :   bool enableInterleavedAccessVectorization() const;
     589             : 
     590             :   /// Enable matching of interleaved access groups that contain predicated 
     591             :   /// accesses and are vectorized using masked vector loads/stores.
     592             :   bool enableMaskedInterleavedAccessVectorization() const;
     593             : 
     594             :   /// Indicate that it is potentially unsafe to automatically vectorize
     595             :   /// floating-point operations because the semantics of vector and scalar
     596             :   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
     597             :   /// does not support IEEE-754 denormal numbers, while depending on the
     598             :   /// platform, scalar floating-point math does.
     599             :   /// This applies to floating-point math operations and calls, not memory
     600             :   /// operations, shuffles, or casts.
     601             :   bool isFPVectorizationPotentiallyUnsafe() const;
     602             : 
     603             :   /// Determine if the target supports unaligned memory accesses.
     604             :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     605             :                                       unsigned BitWidth, unsigned AddressSpace = 0,
     606             :                                       unsigned Alignment = 1,
     607             :                                       bool *Fast = nullptr) const;
     608             : 
     609             :   /// Return hardware support for population count.
     610             :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
     611             : 
     612             :   /// Return true if the hardware has a fast square-root instruction.
     613             :   bool haveFastSqrt(Type *Ty) const;
     614             : 
     615             :   /// Return true if it is faster to check if a floating-point value is NaN
     616             :   /// (or not-NaN) versus a comparison against a constant FP zero value.
     617             :   /// Targets should override this if materializing a 0.0 for comparison is
     618             :   /// generally as cheap as checking for ordered/unordered.
     619             :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
     620             : 
     621             :   /// Return the expected cost of supporting the floating point operation
     622             :   /// of the specified type.
     623             :   int getFPOpCost(Type *Ty) const;
     624             : 
     625             :   /// Return the expected cost of materializing for the given integer
     626             :   /// immediate of the specified type.
     627             :   int getIntImmCost(const APInt &Imm, Type *Ty) const;
     628             : 
     629             :   /// Return the expected cost of materialization for the given integer
     630             :   /// immediate of the specified type for a given instruction. The cost can be
     631             :   /// zero if the immediate can be folded into the specified instruction.
     632             :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     633             :                     Type *Ty) const;
     634             :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     635             :                     Type *Ty) const;
     636             : 
     637             :   /// Return the expected cost for the given integer when optimising
     638             :   /// for size. This is different than the other integer immediate cost
     639             :   /// functions in that it is subtarget agnostic. This is useful when you e.g.
     640             :   /// target one ISA such as Aarch32 but smaller encodings could be possible
     641             :   /// with another such as Thumb. This return value is used as a penalty when
     642             :   /// the total costs for a constant is calculated (the bigger the cost, the
     643             :   /// more beneficial constant hoisting is).
     644             :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     645             :                             Type *Ty) const;
     646             :   /// @}
     647             : 
     648             :   /// \name Vector Target Information
     649             :   /// @{
     650             : 
     651             :   /// The various kinds of shuffle patterns for vector queries.
     652             :   enum ShuffleKind {
     653             :     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     654             :     SK_Reverse,         ///< Reverse the order of the vector.
     655             :     SK_Select,          ///< Selects elements from the corresponding lane of
     656             :                         ///< either source operand. This is equivalent to a
     657             :                         ///< vector select with a constant condition operand.
     658             :     SK_Transpose,       ///< Transpose two vectors.
     659             :     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     660             :     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
     661             :     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
     662             :                         ///< with any shuffle mask.
     663             :     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
     664             :                         ///< shuffle mask.
     665             :   };
     666             : 
     667             :   /// Additional information about an operand's possible values.
     668             :   enum OperandValueKind {
     669             :     OK_AnyValue,               // Operand can have any value.
     670             :     OK_UniformValue,           // Operand is uniform (splat of a value).
     671             :     OK_UniformConstantValue,   // Operand is uniform constant.
     672             :     OK_NonUniformConstantValue // Operand is a non uniform constant value.
     673             :   };
     674             : 
     675             :   /// Additional properties of an operand's values.
     676             :   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
     677             : 
     678             :   /// \return The number of scalar or vector registers that the target has.
     679             :   /// If 'Vectors' is true, it returns the number of vector registers. If it is
     680             :   /// set to false, it returns the number of scalar registers.
     681             :   unsigned getNumberOfRegisters(bool Vector) const;
     682             : 
     683             :   /// \return The width of the largest scalar or vector register type.
     684             :   unsigned getRegisterBitWidth(bool Vector) const;
     685             : 
     686             :   /// \return The width of the smallest vector register type.
     687             :   unsigned getMinVectorRegisterBitWidth() const;
     688             : 
     689             :   /// \return True if the vectorization factor should be chosen to
     690             :   /// make the vector of the smallest element type match the size of a
     691             :   /// vector register. For wider element types, this could result in
     692             :   /// creating vectors that span multiple vector registers.
     693             :   /// If false, the vectorization factor will be chosen based on the
     694             :   /// size of the widest element type.
     695             :   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
     696             : 
     697             :   /// \return The minimum vectorization factor for types of given element
     698             :   /// bit width, or 0 if there is no mimimum VF. The returned value only
     699             :   /// applies when shouldMaximizeVectorBandwidth returns true.
     700             :   unsigned getMinimumVF(unsigned ElemWidth) const;
     701             : 
     702             :   /// \return True if it should be considered for address type promotion.
     703             :   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
     704             :   /// profitable without finding other extensions fed by the same input.
     705             :   bool shouldConsiderAddressTypePromotion(
     706             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
     707             : 
     708             :   /// \return The size of a cache line in bytes.
     709             :   unsigned getCacheLineSize() const;
     710             : 
     711             :   /// The possible cache levels
     712             :   enum class CacheLevel {
     713             :     L1D,   // The L1 data cache
     714             :     L2D,   // The L2 data cache
     715             : 
     716             :     // We currently do not model L3 caches, as their sizes differ widely between
     717             :     // microarchitectures. Also, we currently do not have a use for L3 cache
     718             :     // size modeling yet.
     719             :   };
     720             : 
     721             :   /// \return The size of the cache level in bytes, if available.
     722             :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
     723             : 
     724             :   /// \return The associativity of the cache level, if available.
     725             :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
     726             : 
     727             :   /// \return How much before a load we should place the prefetch instruction.
     728             :   /// This is currently measured in number of instructions.
     729             :   unsigned getPrefetchDistance() const;
     730             : 
     731             :   /// \return Some HW prefetchers can handle accesses up to a certain constant
     732             :   /// stride.  This is the minimum stride in bytes where it makes sense to start
     733             :   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
     734             :   unsigned getMinPrefetchStride() const;
     735             : 
     736             :   /// \return The maximum number of iterations to prefetch ahead.  If the
     737             :   /// required number of iterations is more than this number, no prefetching is
     738             :   /// performed.
     739             :   unsigned getMaxPrefetchIterationsAhead() const;
     740             : 
     741             :   /// \return The maximum interleave factor that any transform should try to
     742             :   /// perform for this target. This number depends on the level of parallelism
     743             :   /// and the number of execution units in the CPU.
     744             :   unsigned getMaxInterleaveFactor(unsigned VF) const;
     745             : 
     746             :   /// Collect properties of V used in cost analyzis, e.g. OP_PowerOf2.
     747             :   OperandValueKind getOperandInfo(Value *V,
     748             :                                   OperandValueProperties &OpProps) const;
     749             : 
     750             :   /// This is an approximation of reciprocal throughput of a math/logic op.
     751             :   /// A higher cost indicates less expected throughput.
     752             :   /// From Agner Fog's guides, reciprocal throughput is "the average number of
     753             :   /// clock cycles per instruction when the instructions are not part of a
     754             :   /// limiting dependency chain."
     755             :   /// Therefore, costs should be scaled to account for multiple execution units
     756             :   /// on the target that can process this type of instruction. For example, if
     757             :   /// there are 5 scalar integer units and 2 vector integer units that can
     758             :   /// calculate an 'add' in a single cycle, this model should indicate that the
     759             :   /// cost of the vector add instruction is 2.5 times the cost of the scalar
     760             :   /// add instruction.
     761             :   /// \p Args is an optional argument which holds the instruction operands
     762             :   /// values so the TTI can analyze those values searching for special
     763             :   /// cases or optimizations based on those values.
     764             :   int getArithmeticInstrCost(
     765             :       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
     766             :       OperandValueKind Opd2Info = OK_AnyValue,
     767             :       OperandValueProperties Opd1PropInfo = OP_None,
     768             :       OperandValueProperties Opd2PropInfo = OP_None,
     769             :       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
     770             : 
     771             :   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
     772             :   /// The index and subtype parameters are used by the subvector insertion and
     773             :   /// extraction shuffle kinds.
     774             :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
     775             :                      Type *SubTp = nullptr) const;
     776             : 
     777             :   /// \return The expected cost of cast instructions, such as bitcast, trunc,
     778             :   /// zext, etc. If there is an existing instruction that holds Opcode, it
     779             :   /// may be passed in the 'I' parameter.
     780             :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     781             :                        const Instruction *I = nullptr) const;
     782             : 
     783             :   /// \return The expected cost of a sign- or zero-extended vector extract. Use
     784             :   /// -1 to indicate that there is no information about the index value.
     785             :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
     786             :                                unsigned Index = -1) const;
     787             : 
     788             :   /// \return The expected cost of control-flow related instructions such as
     789             :   /// Phi, Ret, Br.
     790             :   int getCFInstrCost(unsigned Opcode) const;
     791             : 
     792             :   /// \returns The expected cost of compare and select instructions. If there
     793             :   /// is an existing instruction that holds Opcode, it may be passed in the
     794             :   /// 'I' parameter.
     795             :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     796             :                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
     797             : 
     798             :   /// \return The expected cost of vector Insert and Extract.
     799             :   /// Use -1 to indicate that there is no information on the index value.
     800             :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
     801             : 
     802             :   /// \return The cost of Load and Store instructions.
     803             :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     804             :                       unsigned AddressSpace, const Instruction *I = nullptr) const;
     805             : 
     806             :   /// \return The cost of masked Load and Store instructions.
     807             :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     808             :                             unsigned AddressSpace) const;
     809             : 
     810             :   /// \return The cost of Gather or Scatter operation
     811             :   /// \p Opcode - is a type of memory access Load or Store
     812             :   /// \p DataTy - a vector type of the data to be loaded or stored
     813             :   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
     814             :   /// \p VariableMask - true when the memory access is predicated with a mask
     815             :   ///                   that is not a compile-time constant
     816             :   /// \p Alignment - alignment of single element
     817             :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
     818             :                              bool VariableMask, unsigned Alignment) const;
     819             : 
     820             :   /// \return The cost of the interleaved memory operation.
     821             :   /// \p Opcode is the memory operation code
     822             :   /// \p VecTy is the vector type of the interleaved access.
     823             :   /// \p Factor is the interleave factor
     824             :   /// \p Indices is the indices for interleaved load members (as interleaved
     825             :   ///    load allows gaps)
     826             :   /// \p Alignment is the alignment of the memory operation
     827             :   /// \p AddressSpace is address space of the pointer.
     828             :   /// \p IsMasked indicates if the memory access is predicated.
     829             :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
     830             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
     831             :                                  unsigned AddressSpace, 
     832             :                                  bool IsMasked = false) const;
     833             : 
     834             :   /// Calculate the cost of performing a vector reduction.
     835             :   ///
     836             :   /// This is the cost of reducing the vector value of type \p Ty to a scalar
     837             :   /// value using the operation denoted by \p Opcode. The form of the reduction
     838             :   /// can either be a pairwise reduction or a reduction that splits the vector
     839             :   /// at every reduction level.
     840             :   ///
     841             :   /// Pairwise:
     842             :   ///  (v0, v1, v2, v3)
     843             :   ///  ((v0+v1), (v2+v3), undef, undef)
     844             :   /// Split:
     845             :   ///  (v0, v1, v2, v3)
     846             :   ///  ((v0+v2), (v1+v3), undef, undef)
     847             :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
     848             :                                  bool IsPairwiseForm) const;
     849             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
     850             :                              bool IsUnsigned) const;
     851             : 
     852             :   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
     853             :   /// Three cases are handled: 1. scalar instruction 2. vector instruction
     854             :   /// 3. scalar instruction which is to be vectorized with VF.
     855             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     856             :                             ArrayRef<Value *> Args, FastMathFlags FMF,
     857             :                             unsigned VF = 1) const;
     858             : 
     859             :   /// \returns The cost of Intrinsic instructions. Types analysis only.
     860             :   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
     861             :   /// arguments and the return value will be computed based on types.
     862             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     863             :                             ArrayRef<Type *> Tys, FastMathFlags FMF,
     864             :                             unsigned ScalarizationCostPassed = UINT_MAX) const;
     865             : 
     866             :   /// \returns The cost of Call instructions.
     867             :   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
     868             : 
     869             :   /// \returns The number of pieces into which the provided type must be
     870             :   /// split during legalization. Zero is returned when the answer is unknown.
     871             :   unsigned getNumberOfParts(Type *Tp) const;
     872             : 
     873             :   /// \returns The cost of the address computation. For most targets this can be
     874             :   /// merged into the instruction indexing mode. Some targets might want to
     875             :   /// distinguish between address computation for memory operations on vector
     876             :   /// types and scalar types. Such targets should override this function.
     877             :   /// The 'SE' parameter holds pointer for the scalar evolution object which
     878             :   /// is used in order to get the Ptr step value in case of constant stride.
     879             :   /// The 'Ptr' parameter holds SCEV of the access pointer.
     880             :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
     881             :                                 const SCEV *Ptr = nullptr) const;
     882             : 
     883             :   /// \returns The cost, if any, of keeping values of the given types alive
     884             :   /// over a callsite.
     885             :   ///
     886             :   /// Some types may require the use of register classes that do not have
     887             :   /// any callee-saved registers, so would require a spill and fill.
     888             :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
     889             : 
     890             :   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
     891             :   /// will contain additional information - whether the intrinsic may write
     892             :   /// or read to memory, volatility and the pointer.  Info is undefined
     893             :   /// if false is returned.
     894             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     895             : 
     896             :   /// \returns The maximum element size, in bytes, for an element
     897             :   /// unordered-atomic memory intrinsic.
     898             :   unsigned getAtomicMemIntrinsicMaxElementSize() const;
     899             : 
     900             :   /// \returns A value which is the result of the given memory intrinsic.  New
     901             :   /// instructions may be created to extract the result from the given intrinsic
     902             :   /// memory operation.  Returns nullptr if the target cannot create a result
     903             :   /// from the given intrinsic.
     904             :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     905             :                                            Type *ExpectedType) const;
     906             : 
     907             :   /// \returns The type to use in a loop expansion of a memcpy call.
     908             :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
     909             :                                   unsigned SrcAlign, unsigned DestAlign) const;
     910             : 
     911             :   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
     912             :   /// \param RemainingBytes The number of bytes to copy.
     913             :   ///
     914             :   /// Calculates the operand types to use when copying \p RemainingBytes of
     915             :   /// memory, where source and destination alignments are \p SrcAlign and
     916             :   /// \p DestAlign respectively.
     917             :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
     918             :                                          LLVMContext &Context,
     919             :                                          unsigned RemainingBytes,
     920             :                                          unsigned SrcAlign,
     921             :                                          unsigned DestAlign) const;
     922             : 
     923             :   /// \returns True if the two functions have compatible attributes for inlining
     924             :   /// purposes.
     925             :   bool areInlineCompatible(const Function *Caller,
     926             :                            const Function *Callee) const;
     927             : 
     928             :   /// The type of load/store indexing.
     929             :   enum MemIndexedMode {
     930             :     MIM_Unindexed,  ///< No indexing.
     931             :     MIM_PreInc,     ///< Pre-incrementing.
     932             :     MIM_PreDec,     ///< Pre-decrementing.
     933             :     MIM_PostInc,    ///< Post-incrementing.
     934             :     MIM_PostDec     ///< Post-decrementing.
     935             :   };
     936             : 
     937             :   /// \returns True if the specified indexed load for the given type is legal.
     938             :   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
     939             : 
     940             :   /// \returns True if the specified indexed store for the given type is legal.
     941             :   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
     942             : 
     943             :   /// \returns The bitwidth of the largest vector type that should be used to
     944             :   /// load/store in the given address space.
     945             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     946             : 
     947             :   /// \returns True if the load instruction is legal to vectorize.
     948             :   bool isLegalToVectorizeLoad(LoadInst *LI) const;
     949             : 
     950             :   /// \returns True if the store instruction is legal to vectorize.
     951             :   bool isLegalToVectorizeStore(StoreInst *SI) const;
     952             : 
     953             :   /// \returns True if it is legal to vectorize the given load chain.
     954             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     955             :                                    unsigned Alignment,
     956             :                                    unsigned AddrSpace) const;
     957             : 
     958             :   /// \returns True if it is legal to vectorize the given store chain.
     959             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     960             :                                     unsigned Alignment,
     961             :                                     unsigned AddrSpace) const;
     962             : 
     963             :   /// \returns The new vector factor value if the target doesn't support \p
     964             :   /// SizeInBytes loads or has a better vector factor.
     965             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     966             :                                unsigned ChainSizeInBytes,
     967             :                                VectorType *VecTy) const;
     968             : 
     969             :   /// \returns The new vector factor value if the target doesn't support \p
     970             :   /// SizeInBytes stores or has a better vector factor.
     971             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     972             :                                 unsigned ChainSizeInBytes,
     973             :                                 VectorType *VecTy) const;
     974             : 
     975             :   /// Flags describing the kind of vector reduction.
     976             :   struct ReductionFlags {
     977         203 :     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
     978             :     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
     979             :     bool IsSigned; ///< Whether the operation is a signed int reduction.
     980             :     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
     981             :   };
     982             : 
     983             :   /// \returns True if the target wants to handle the given reduction idiom in
     984             :   /// the intrinsics form instead of the shuffle form.
     985             :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
     986             :                              ReductionFlags Flags) const;
     987             : 
     988             :   /// \returns True if the target wants to expand the given reduction intrinsic
     989             :   /// into a shuffle sequence.
     990             :   bool shouldExpandReduction(const IntrinsicInst *II) const;
     991             :   /// @}
     992             : 
     993             : private:
     994             :   /// Estimate the latency of specified instruction.
     995             :   /// Returns 1 as the default value.
     996             :   int getInstructionLatency(const Instruction *I) const;
     997             : 
     998             :   /// Returns the expected throughput cost of the instruction.
     999             :   /// Returns -1 if the cost is unknown.
    1000             :   int getInstructionThroughput(const Instruction *I) const;
    1001             : 
    1002             :   /// The abstract base class used to type erase specific TTI
    1003             :   /// implementations.
    1004             :   class Concept;
    1005             : 
    1006             :   /// The template model for the base class which wraps a concrete
    1007             :   /// implementation in a type erased interface.
    1008             :   template <typename T> class Model;
    1009             : 
    1010             :   std::unique_ptr<Concept> TTIImpl;
    1011             : };
    1012             : 
    1013             : class TargetTransformInfo::Concept {
    1014             : public:
    1015             :   virtual ~Concept() = 0;
    1016             :   virtual const DataLayout &getDataLayout() const = 0;
    1017             :   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
    1018             :   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
    1019             :                          ArrayRef<const Value *> Operands) = 0;
    1020             :   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
    1021             :   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
    1022             :   virtual int getCallCost(const Function *F, int NumArgs) = 0;
    1023             :   virtual int getCallCost(const Function *F,
    1024             :                           ArrayRef<const Value *> Arguments) = 0;
    1025             :   virtual unsigned getInliningThresholdMultiplier() = 0;
    1026             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1027             :                                ArrayRef<Type *> ParamTys) = 0;
    1028             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1029             :                                ArrayRef<const Value *> Arguments) = 0;
    1030             :   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1031             :                                                     unsigned &JTSize) = 0;
    1032             :   virtual int
    1033             :   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
    1034             :   virtual bool hasBranchDivergence() = 0;
    1035             :   virtual bool isSourceOfDivergence(const Value *V) = 0;
    1036             :   virtual bool isAlwaysUniform(const Value *V) = 0;
    1037             :   virtual unsigned getFlatAddressSpace() = 0;
    1038             :   virtual bool isLoweredToCall(const Function *F) = 0;
    1039             :   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
    1040             :                                        UnrollingPreferences &UP) = 0;
    1041             :   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
    1042             :   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
    1043             :   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
    1044             :                                      int64_t BaseOffset, bool HasBaseReg,
    1045             :                                      int64_t Scale,
    1046             :                                      unsigned AddrSpace,
    1047             :                                      Instruction *I) = 0;
    1048             :   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1049             :                              TargetTransformInfo::LSRCost &C2) = 0;
    1050             :   virtual bool canMacroFuseCmp() = 0;
    1051             :   virtual bool shouldFavorPostInc() const = 0;
    1052             :   virtual bool isLegalMaskedStore(Type *DataType) = 0;
    1053             :   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
    1054             :   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
    1055             :   virtual bool isLegalMaskedGather(Type *DataType) = 0;
    1056             :   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
    1057             :   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
    1058             :   virtual bool prefersVectorizedAddressing() = 0;
    1059             :   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
    1060             :                                    int64_t BaseOffset, bool HasBaseReg,
    1061             :                                    int64_t Scale, unsigned AddrSpace) = 0;
    1062             :   virtual bool LSRWithInstrQueries() = 0;
    1063             :   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
    1064             :   virtual bool isProfitableToHoist(Instruction *I) = 0;
    1065             :   virtual bool useAA() = 0;
    1066             :   virtual bool isTypeLegal(Type *Ty) = 0;
    1067             :   virtual unsigned getJumpBufAlignment() = 0;
    1068             :   virtual unsigned getJumpBufSize() = 0;
    1069             :   virtual bool shouldBuildLookupTables() = 0;
    1070             :   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
    1071             :   virtual bool useColdCCForColdCall(Function &F) = 0;
    1072             :   virtual unsigned
    1073             :   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
    1074             :   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1075             :                                                     unsigned VF) = 0;
    1076             :   virtual bool supportsEfficientVectorElementLoadStore() = 0;
    1077             :   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    1078             :   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
    1079             :       bool IsZeroCmp) const = 0;
    1080             :   virtual bool enableInterleavedAccessVectorization() = 0;
    1081             :   virtual bool enableMaskedInterleavedAccessVectorization() = 0;
    1082             :   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
    1083             :   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1084             :                                               unsigned BitWidth,
    1085             :                                               unsigned AddressSpace,
    1086             :                                               unsigned Alignment,
    1087             :                                               bool *Fast) = 0;
    1088             :   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    1089             :   virtual bool haveFastSqrt(Type *Ty) = 0;
    1090             :   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
    1091             :   virtual int getFPOpCost(Type *Ty) = 0;
    1092             :   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1093             :                                     Type *Ty) = 0;
    1094             :   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    1095             :   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1096             :                             Type *Ty) = 0;
    1097             :   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1098             :                             Type *Ty) = 0;
    1099             :   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    1100             :   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
    1101             :   virtual unsigned getMinVectorRegisterBitWidth() = 0;
    1102             :   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
    1103             :   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
    1104             :   virtual bool shouldConsiderAddressTypePromotion(
    1105             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
    1106             :   virtual unsigned getCacheLineSize() = 0;
    1107             :   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
    1108             :   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
    1109             :   virtual unsigned getPrefetchDistance() = 0;
    1110             :   virtual unsigned getMinPrefetchStride() = 0;
    1111             :   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    1112             :   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    1113             :   virtual unsigned
    1114             :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1115             :                          OperandValueKind Opd2Info,
    1116             :                          OperandValueProperties Opd1PropInfo,
    1117             :                          OperandValueProperties Opd2PropInfo,
    1118             :                          ArrayRef<const Value *> Args) = 0;
    1119             :   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1120             :                              Type *SubTp) = 0;
    1121             :   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1122             :                                const Instruction *I) = 0;
    1123             :   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
    1124             :                                        VectorType *VecTy, unsigned Index) = 0;
    1125             :   virtual int getCFInstrCost(unsigned Opcode) = 0;
    1126             :   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    1127             :                                 Type *CondTy, const Instruction *I) = 0;
    1128             :   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
    1129             :                                  unsigned Index) = 0;
    1130             :   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1131             :                               unsigned AddressSpace, const Instruction *I) = 0;
    1132             :   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    1133             :                                     unsigned Alignment,
    1134             :                                     unsigned AddressSpace) = 0;
    1135             :   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1136             :                                      Value *Ptr, bool VariableMask,
    1137             :                                      unsigned Alignment) = 0;
    1138             :   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
    1139             :                                          unsigned Factor,
    1140             :                                          ArrayRef<unsigned> Indices,
    1141             :                                          unsigned Alignment,
    1142             :                                          unsigned AddressSpace,
    1143             :                                          bool IsMasked = false) = 0;
    1144             :   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1145             :                                          bool IsPairwiseForm) = 0;
    1146             :   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1147             :                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
    1148             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1149             :                       ArrayRef<Type *> Tys, FastMathFlags FMF,
    1150             :                       unsigned ScalarizationCostPassed) = 0;
    1151             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1152             :          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    1153             :   virtual int getCallInstrCost(Function *F, Type *RetTy,
    1154             :                                ArrayRef<Type *> Tys) = 0;
    1155             :   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    1156             :   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1157             :                                         const SCEV *Ptr) = 0;
    1158             :   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    1159             :   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1160             :                                   MemIntrinsicInfo &Info) = 0;
    1161             :   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
    1162             :   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1163             :                                                    Type *ExpectedType) = 0;
    1164             :   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1165             :                                           unsigned SrcAlign,
    1166             :                                           unsigned DestAlign) const = 0;
    1167             :   virtual void getMemcpyLoopResidualLoweringType(
    1168             :       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    1169             :       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
    1170             :   virtual bool areInlineCompatible(const Function *Caller,
    1171             :                                    const Function *Callee) const = 0;
    1172             :   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
    1173             :   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
    1174             :   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
    1175             :   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
    1176             :   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
    1177             :   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1178             :                                            unsigned Alignment,
    1179             :                                            unsigned AddrSpace) const = 0;
    1180             :   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1181             :                                             unsigned Alignment,
    1182             :                                             unsigned AddrSpace) const = 0;
    1183             :   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1184             :                                        unsigned ChainSizeInBytes,
    1185             :                                        VectorType *VecTy) const = 0;
    1186             :   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1187             :                                         unsigned ChainSizeInBytes,
    1188             :                                         VectorType *VecTy) const = 0;
    1189             :   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1190             :                                      ReductionFlags) const = 0;
    1191             :   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
    1192             :   virtual int getInstructionLatency(const Instruction *I) = 0;
    1193             : };
    1194             : 
    1195             : template <typename T>
    1196             : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    1197             :   T Impl;
    1198             : 
    1199             : public:
    1200     3073624 :   Model(T Impl) : Impl(std::move(Impl)) {}
    1201     3932259 :   ~Model() override {}
    1202             : 
    1203           0 :   const DataLayout &getDataLayout() const override {
    1204        7637 :     return Impl.getDataLayout();
    1205             :   }
    1206           0 : 
    1207           0 :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    1208           0 :     return Impl.getOperationCost(Opcode, Ty, OpTy);
    1209           0 :   }
    1210         369 :   int getGEPCost(Type *PointeeType, const Value *Ptr,
    1211             :                  ArrayRef<const Value *> Operands) override {
    1212         369 :     return Impl.getGEPCost(PointeeType, Ptr, Operands);
    1213           0 :   }
    1214           0 :   int getExtCost(const Instruction *I, const Value *Src) override {
    1215           0 :     return Impl.getExtCost(I, Src);
    1216           0 :   }
    1217           0 :   int getCallCost(FunctionType *FTy, int NumArgs) override {
    1218           0 :     return Impl.getCallCost(FTy, NumArgs);
    1219           0 :   }
    1220           0 :   int getCallCost(const Function *F, int NumArgs) override {
    1221           0 :     return Impl.getCallCost(F, NumArgs);
    1222       34068 :   }
    1223           0 :   int getCallCost(const Function *F,
    1224       34068 :                   ArrayRef<const Value *> Arguments) override {
    1225           0 :     return Impl.getCallCost(F, Arguments);
    1226        1823 :   }
    1227      288234 :   unsigned getInliningThresholdMultiplier() override {
    1228      290057 :     return Impl.getInliningThresholdMultiplier();
    1229             :   }
    1230       32245 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1231             :                        ArrayRef<Type *> ParamTys) override {
    1232       32245 :     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    1233             :   }
    1234           4 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1235           0 :                        ArrayRef<const Value *> Arguments) override {
    1236           4 :     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    1237           0 :   }
    1238     4065703 :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
    1239     4065703 :     return Impl.getUserCost(U, Operands);
    1240           0 :   }
    1241       44000 :   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    1242          90 :   bool isSourceOfDivergence(const Value *V) override {
    1243          90 :     return Impl.isSourceOfDivergence(V);
    1244           0 :   }
    1245             : 
    1246          28 :   bool isAlwaysUniform(const Value *V) override {
    1247          28 :     return Impl.isAlwaysUniform(V);
    1248             :   }
    1249           0 : 
    1250        1056 :   unsigned getFlatAddressSpace() override {
    1251        1056 :     return Impl.getFlatAddressSpace();
    1252           0 :   }
    1253           0 : 
    1254      997312 :   bool isLoweredToCall(const Function *F) override {
    1255      997312 :     return Impl.isLoweredToCall(F);
    1256           0 :   }
    1257        8845 :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
    1258           0 :                                UnrollingPreferences &UP) override {
    1259        8845 :     return Impl.getUnrollingPreferences(L, SE, UP);
    1260             :   }
    1261       25984 :   bool isLegalAddImmediate(int64_t Imm) override {
    1262       25984 :     return Impl.isLegalAddImmediate(Imm);
    1263           0 :   }
    1264       28650 :   bool isLegalICmpImmediate(int64_t Imm) override {
    1265       28650 :     return Impl.isLegalICmpImmediate(Imm);
    1266             :   }
    1267      570583 :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1268             :                              bool HasBaseReg, int64_t Scale,
    1269           0 :                              unsigned AddrSpace,
    1270             :                              Instruction *I) override {
    1271           0 :     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    1272      570583 :                                       Scale, AddrSpace, I);
    1273         112 :   }
    1274      120262 :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1275             :                      TargetTransformInfo::LSRCost &C2) override {
    1276      120150 :     return Impl.isLSRCostLess(C1, C2);
    1277           0 :   }
    1278       34139 :   bool canMacroFuseCmp() override {
    1279       34251 :     return Impl.canMacroFuseCmp();
    1280         112 :   }
    1281      256451 :   bool shouldFavorPostInc() const override {
    1282      256451 :     return Impl.shouldFavorPostInc();
    1283             :   }
    1284         553 :   bool isLegalMaskedStore(Type *DataType) override {
    1285         553 :     return Impl.isLegalMaskedStore(DataType);
    1286           0 :   }
    1287         521 :   bool isLegalMaskedLoad(Type *DataType) override {
    1288         521 :     return Impl.isLegalMaskedLoad(DataType);
    1289             :   }
    1290         675 :   bool isLegalMaskedScatter(Type *DataType) override {
    1291         675 :     return Impl.isLegalMaskedScatter(DataType);
    1292           0 :   }
    1293         820 :   bool isLegalMaskedGather(Type *DataType) override {
    1294         820 :     return Impl.isLegalMaskedGather(DataType);
    1295             :   }
    1296          66 :   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    1297          66 :     return Impl.hasDivRemOp(DataType, IsSigned);
    1298           0 :   }
    1299         330 :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
    1300         330 :     return Impl.hasVolatileVariant(I, AddrSpace);
    1301             :   }
    1302        1407 :   bool prefersVectorizedAddressing() override {
    1303        1407 :     return Impl.prefersVectorizedAddressing();
    1304           0 :   }
    1305      125812 :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1306        8969 :                            bool HasBaseReg, int64_t Scale,
    1307        8969 :                            unsigned AddrSpace) override {
    1308             :     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
    1309      126194 :                                      Scale, AddrSpace);
    1310         382 :   }
    1311       71140 :   bool LSRWithInstrQueries() override {
    1312       79727 :     return Impl.LSRWithInstrQueries();
    1313        8587 :   }
    1314       20899 :   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    1315      121881 :     return Impl.isTruncateFree(Ty1, Ty2);
    1316     1444647 :   }
    1317     1450538 :   bool isProfitableToHoist(Instruction *I) override {
    1318        5891 :     return Impl.isProfitableToHoist(I);
    1319           0 :   }
    1320         552 :   bool useAA() override { return Impl.useAA(); }
    1321         767 :   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    1322     1444647 :   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    1323     1444647 :   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    1324        1279 :   bool shouldBuildLookupTables() override {
    1325        1279 :     return Impl.shouldBuildLookupTables();
    1326      254054 :   }
    1327      257649 :   bool shouldBuildLookupTablesForConstant(Constant *C) override {
    1328        3595 :     return Impl.shouldBuildLookupTablesForConstant(C);
    1329           0 :   }
    1330           5 :   bool useColdCCForColdCall(Function &F) override {
    1331           5 :     return Impl.useColdCCForColdCall(F);
    1332      254054 :   }
    1333      254054 : 
    1334         837 :   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
    1335             :                                     bool Extract) override {
    1336       23061 :     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
    1337       22224 :   }
    1338         885 :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1339        2382 :                                             unsigned VF) override {
    1340        3267 :     return Impl.getOperandsScalarizationOverhead(Args, VF);
    1341             :   }
    1342       19842 : 
    1343       20334 :   bool supportsEfficientVectorElementLoadStore() override {
    1344         492 :     return Impl.supportsEfficientVectorElementLoadStore();
    1345             :   }
    1346         363 : 
    1347         445 :   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    1348          82 :     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    1349           0 :   }
    1350      176872 :   const MemCmpExpansionOptions *enableMemCmpExpansion(
    1351             :       bool IsZeroCmp) const override {
    1352      177235 :     return Impl.enableMemCmpExpansion(IsZeroCmp);
    1353         363 :   }
    1354         979 :   bool enableInterleavedAccessVectorization() override {
    1355         999 :     return Impl.enableInterleavedAccessVectorization();
    1356             :   }
    1357         490 :   bool enableMaskedInterleavedAccessVectorization() override {
    1358         470 :     return Impl.enableMaskedInterleavedAccessVectorization();
    1359           3 :   }
    1360         158 :   bool isFPVectorizationPotentiallyUnsafe() override {
    1361         161 :     return Impl.isFPVectorizationPotentiallyUnsafe();
    1362             :   }
    1363          62 :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1364             :                                       unsigned BitWidth, unsigned AddressSpace,
    1365          17 :                                       unsigned Alignment, bool *Fast) override {
    1366             :     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
    1367         365 :                                                Alignment, Fast);
    1368         320 :   }
    1369        3577 :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    1370        3591 :     return Impl.getPopcntSupport(IntTyWidthInBit);
    1371          14 :   }
    1372          58 :   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    1373         306 : 
    1374         333 :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
    1375          27 :     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
    1376         544 :   }
    1377         544 : 
    1378         535 :   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
    1379          74 : 
    1380         256 :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1381             :                             Type *Ty) override {
    1382         652 :     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
    1383         470 :   }
    1384          84 :   int getIntImmCost(const APInt &Imm, Type *Ty) override {
    1385       11110 :     return Impl.getIntImmCost(Imm, Ty);
    1386             :   }
    1387      516391 :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1388             :                     Type *Ty) override {
    1389      516456 :     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    1390       11026 :   }
    1391      154016 :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1392         523 :                     Type *Ty) override {
    1393      154020 :     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    1394             :   }
    1395      242165 :   unsigned getNumberOfRegisters(bool Vector) override {
    1396      242165 :     return Impl.getNumberOfRegisters(Vector);
    1397         523 :   }
    1398       13652 :   unsigned getRegisterBitWidth(bool Vector) const override {
    1399       24155 :     return Impl.getRegisterBitWidth(Vector);
    1400             :   }
    1401       11812 :   unsigned getMinVectorRegisterBitWidth() override {
    1402       11892 :     return Impl.getMinVectorRegisterBitWidth();
    1403             :   }
    1404       11406 :   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
    1405         903 :     return Impl.shouldMaximizeVectorBandwidth(OptSize);
    1406         942 :   }
    1407           2 :   unsigned getMinimumVF(unsigned ElemWidth) const override {
    1408         944 :     return Impl.getMinimumVF(ElemWidth);
    1409             :   }
    1410       32528 :   bool shouldConsiderAddressTypePromotion(
    1411             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
    1412          74 :     return Impl.shouldConsiderAddressTypePromotion(
    1413       32454 :         I, AllowPromotionWithoutCommonHeader);
    1414         868 :   }
    1415          51 :   unsigned getCacheLineSize() override {
    1416         919 :     return Impl.getCacheLineSize();
    1417             :   }
    1418         347 :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
    1419         347 :     return Impl.getCacheSize(Level);
    1420             :   }
    1421          49 :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
    1422          49 :     return Impl.getCacheAssociativity(Level);
    1423             :   }
    1424       21855 :   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
    1425         378 :   unsigned getMinPrefetchStride() override {
    1426          78 :     return Impl.getMinPrefetchStride();
    1427        2797 :   }
    1428        3026 :   unsigned getMaxPrefetchIterationsAhead() override {
    1429         229 :     return Impl.getMaxPrefetchIterationsAhead();
    1430         178 :   }
    1431        2189 :   unsigned getMaxInterleaveFactor(unsigned VF) override {
    1432        2044 :     return Impl.getMaxInterleaveFactor(VF);
    1433        2619 :   }
    1434       10615 :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1435             :                                             unsigned &JTSize) override {
    1436        7998 :     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
    1437           2 :   }
    1438             :   unsigned
    1439      210389 :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1440           0 :                          OperandValueKind Opd2Info,
    1441             :                          OperandValueProperties Opd1PropInfo,
    1442           2 :                          OperandValueProperties Opd2PropInfo,
    1443           2 :                          ArrayRef<const Value *> Args) override {
    1444             :     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    1445      210389 :                                        Opd1PropInfo, Opd2PropInfo, Args);
    1446           0 :   }
    1447        5312 :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1448           0 :                      Type *SubTp) override {
    1449        5312 :     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    1450             :   }
    1451        6903 :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1452           0 :                        const Instruction *I) override {
    1453        6903 :     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
    1454           2 :   }
    1455          34 :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
    1456             :                                unsigned Index) override {
    1457          32 :     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
    1458           0 :   }
    1459       11273 :   int getCFInstrCost(unsigned Opcode) override {
    1460       11275 :     return Impl.getCFInstrCost(Opcode);
    1461           2 :   }
    1462        5550 :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    1463           0 :                          const Instruction *I) override {
    1464        5550 :     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
    1465             :   }
    1466       87172 :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    1467       87172 :     return Impl.getVectorInstrCost(Opcode, Val, Index);
    1468             :   }
    1469      395277 :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1470           0 :                       unsigned AddressSpace, const Instruction *I) override {
    1471      395277 :     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
    1472           0 :   }
    1473         141 :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1474             :                             unsigned AddressSpace) override {
    1475         141 :     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    1476           0 :   }
    1477         132 :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1478           0 :                              Value *Ptr, bool VariableMask,
    1479           0 :                              unsigned Alignment) override {
    1480             :     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
    1481         401 :                                        Alignment);
    1482         269 :   }
    1483          92 :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
    1484           0 :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
    1485           0 :                                  unsigned AddressSpace, bool IsMasked) override {
    1486             :     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
    1487         361 :                                            Alignment, AddressSpace, IsMasked);
    1488         269 :   }
    1489         261 :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1490           3 :                                  bool IsPairwiseForm) override {
    1491         264 :     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
    1492             :   }
    1493         898 :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1494           0 :                              bool IsPairwiseForm, bool IsUnsigned) override {
    1495         898 :     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
    1496           3 :    }
    1497         949 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
    1498             :                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
    1499         444 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
    1500         946 :                                       ScalarizationCostPassed);
    1501             :   }
    1502        2837 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1503         444 :        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
    1504        2837 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    1505          10 :   }
    1506         554 :   int getCallInstrCost(Function *F, Type *RetTy,
    1507             :                        ArrayRef<Type *> Tys) override {
    1508         554 :     return Impl.getCallInstrCost(F, RetTy, Tys);
    1509          10 :   }
    1510       22477 :   unsigned getNumberOfParts(Type *Tp) override {
    1511       22911 :     return Impl.getNumberOfParts(Tp);
    1512             :   }
    1513        1582 :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1514             :                                 const SCEV *Ptr) override {
    1515        2016 :     return Impl.getAddressComputationCost(Ty, SE, Ptr);
    1516             :   }
    1517       14248 :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    1518       14248 :     return Impl.getCostOfKeepingLiveOverCall(Tys);
    1519             :   }
    1520      380973 :   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1521           9 :                           MemIntrinsicInfo &Info) override {
    1522      380964 :     return Impl.getTgtMemIntrinsic(Inst, Info);
    1523         453 :   }
    1524         462 :   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
    1525           9 :     return Impl.getAtomicMemIntrinsicMaxElementSize();
    1526         475 :   }
    1527         495 :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1528             :                                            Type *ExpectedType) override {
    1529          46 :     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    1530          26 :   }
    1531          15 :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1532         449 :                                   unsigned SrcAlign,
    1533         449 :                                   unsigned DestAlign) const override {
    1534          15 :     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
    1535           0 :   }
    1536           0 :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
    1537             :                                          LLVMContext &Context,
    1538           0 :                                          unsigned RemainingBytes,
    1539           0 :                                          unsigned SrcAlign,
    1540             :                                          unsigned DestAlign) const override {
    1541           0 :     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
    1542           0 :                                            SrcAlign, DestAlign);
    1543           0 :   }
    1544      334696 :   bool areInlineCompatible(const Function *Caller,
    1545           2 :                            const Function *Callee) const override {
    1546      334696 :     return Impl.areInlineCompatible(Caller, Callee);
    1547           0 :   }
    1548        7649 :   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
    1549        7649 :     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
    1550             :   }
    1551           6 :   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
    1552           6 :     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
    1553             :   }
    1554         809 :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
    1555         809 :     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
    1556             :   }
    1557         371 :   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
    1558         371 :     return Impl.isLegalToVectorizeLoad(LI);
    1559             :   }
    1560         332 :   bool isLegalToVectorizeStore(StoreInst *SI) const override {
    1561         332 :     return Impl.isLegalToVectorizeStore(SI);
    1562             :   }
    1563          34 :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1564           0 :                                    unsigned Alignment,
    1565             :                                    unsigned AddrSpace) const override {
    1566           0 :     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
    1567          34 :                                             AddrSpace);
    1568             :   }
    1569          11 :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1570           0 :                                     unsigned Alignment,
    1571             :                                     unsigned AddrSpace) const override {
    1572           0 :     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
    1573          11 :                                              AddrSpace);
    1574             :   }
    1575          94 :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1576           0 :                                unsigned ChainSizeInBytes,
    1577             :                                VectorType *VecTy) const override {
    1578          94 :     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
    1579             :   }
    1580          42 :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1581             :                                 unsigned ChainSizeInBytes,
    1582           0 :                                 VectorType *VecTy) const override {
    1583          42 :     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
    1584           0 :   }
    1585         306 :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1586           0 :                              ReductionFlags Flags) const override {
    1587         306 :     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
    1588           0 :   }
    1589        1741 :   bool shouldExpandReduction(const IntrinsicInst *II) const override {
    1590        1741 :     return Impl.shouldExpandReduction(II);
    1591             :   }
    1592          11 :   int getInstructionLatency(const Instruction *I) override {
    1593          11 :     return Impl.getInstructionLatency(I);
    1594           0 :   }
    1595             : };
    1596           0 : 
    1597             : template <typename T>
    1598      190309 : TargetTransformInfo::TargetTransformInfo(T Impl)
    1599     2622743 :     : TTIImpl(new Model<T>(Impl)) {}
    1600             : 
    1601           0 : /// Analysis pass providing the \c TargetTransformInfo.
    1602           0 : ///
    1603             : /// The core idea of the TargetIRAnalysis is to expose an interface through
    1604           0 : /// which LLVM targets can analyze and provide information about the middle
    1605           0 : /// end's target-independent IR. This supports use cases such as target-aware
    1606             : /// cost modeling of IR constructs.
    1607           0 : ///
    1608           0 : /// This is a function analysis because much of the cost modeling for targets
    1609             : /// is done in a subtarget specific way and LLVM supports compiling different
    1610             : /// functions targeting different subtargets in order to support runtime
    1611           3 : /// dispatch according to the observed subtarget.
    1612           3 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
    1613             : public:
    1614           0 :   typedef TargetTransformInfo Result;
    1615           0 : 
    1616             :   /// Default construct a target IR analysis.
    1617           3 :   ///
    1618           3 :   /// This will use the module's datalayout to construct a baseline
    1619             :   /// conservative TTI result.
    1620       21444 :   TargetIRAnalysis();
    1621             : 
    1622       21444 :   /// Construct an IR analysis pass around a target-provide callback.
    1623             :   ///
    1624        2297 :   /// The callback will be called with a particular function for which the TTI
    1625             :   /// is needed and must return a TTI object for that function.
    1626        2297 :   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
    1627             : 
    1628       19147 :   // Value semantics. We spell out the constructors for MSVC.
    1629             :   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    1630       19147 :       : TTICallback(Arg.TTICallback) {}
    1631             :   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    1632           8 :       : TTICallback(std::move(Arg.TTICallback)) {}
    1633           8 :   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    1634             :     TTICallback = RHS.TTICallback;
    1635           0 :     return *this;
    1636           0 :   }
    1637             :   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    1638           8 :     TTICallback = std::move(RHS.TTICallback);
    1639           8 :     return *this;
    1640             :   }
    1641           0 : 
    1642           0 :   Result run(const Function &F, FunctionAnalysisManager &);
    1643             : 
    1644           0 : private:
    1645           0 :   friend AnalysisInfoMixin<TargetIRAnalysis>;
    1646             :   static AnalysisKey Key;
    1647           0 : 
    1648           0 :   /// The callback used to produce a result.
    1649             :   ///
    1650           2 :   /// We use a completely opaque callback so that targets can provide whatever
    1651           2 :   /// mechanism they desire for constructing the TTI for a given function.
    1652             :   ///
    1653           0 :   /// FIXME: Should we really use std::function? It's relatively inefficient.
    1654           0 :   /// It might be possible to arrange for even stateful callbacks to outlive
    1655             :   /// the analysis and thus use a function_ref which would be lighter weight.
    1656           2 :   /// This may also be less error prone as the callback is likely to reference
    1657           2 :   /// the external TargetMachine, and that reference needs to never dangle.
    1658             :   std::function<Result(const Function &)> TTICallback;
    1659        8305 : 
    1660             :   /// Helper function used as the callback in the default constructor.
    1661             :   static Result getDefaultTTI(const Function &F);
    1662             : };
    1663        8305 : 
    1664             : /// Wrapper pass for TargetTransformInfo.
    1665         118 : ///
    1666             : /// This pass can be constructed from a TTI object which it stores internally
    1667             : /// and is queried by passes.
    1668             : class TargetTransformInfoWrapperPass : public ImmutablePass {
    1669         118 :   TargetIRAnalysis TIRA;
    1670             :   Optional<TargetTransformInfo> TTI;
    1671        8187 : 
    1672             :   virtual void anchor();
    1673             : 
    1674             : public:
    1675        8187 :   static char ID;
    1676             : 
    1677           7 :   /// We must provide a default constructor for the pass but it should
    1678           7 :   /// never be used.
    1679             :   ///
    1680           0 :   /// Use the constructor below or call one of the creation routines.
    1681           0 :   TargetTransformInfoWrapperPass();
    1682             : 
    1683           7 :   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1684           7 : 
    1685             :   TargetTransformInfo &getTTI(const Function &F);
    1686           0 : };
    1687             : 
    1688           0 : /// Create an analysis pass wrapper around a TTI object.
    1689           0 : ///
    1690             : /// This analysis pass just holds the TTI instance and makes it available to
    1691           0 : /// clients.
    1692           0 : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1693             : 
    1694           0 : } // End llvm namespace
    1695           0 : 
    1696             : #endif

Generated by: LCOV version 1.13