LCOV - code coverage report
Current view: top level - include/llvm/Analysis - TargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 193 213 90.6 %
Date: 2017-09-14 15:23:50 Functions: 501 1207 41.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file
      10             : /// This pass exposes codegen information to IR-level passes. Every
      11             : /// transformation that uses codegen information is broken into three parts:
      12             : /// 1. The IR-level analysis pass.
      13             : /// 2. The IR-level transformation interface which provides the needed
      14             : ///    information.
      15             : /// 3. Codegen-level implementation which uses target-specific hooks.
      16             : ///
      17             : /// This file defines #2, which is the interface that IR-level transformations
      18             : /// use for querying the codegen.
      19             : ///
      20             : //===----------------------------------------------------------------------===//
      21             : 
      22             : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      23             : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      24             : 
      25             : #include "llvm/ADT/Optional.h"
      26             : #include "llvm/IR/Operator.h"
      27             : #include "llvm/IR/PassManager.h"
      28             : #include "llvm/Pass.h"
      29             : #include "llvm/Support/AtomicOrdering.h"
      30             : #include "llvm/Support/DataTypes.h"
      31             : #include <functional>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : namespace Intrinsic {
      36             : enum ID : unsigned;
      37             : }
      38             : 
      39             : class Function;
      40             : class GlobalValue;
      41             : class IntrinsicInst;
      42             : class LoadInst;
      43             : class Loop;
      44             : class SCEV;
      45             : class ScalarEvolution;
      46             : class StoreInst;
      47             : class SwitchInst;
      48             : class Type;
      49             : class User;
      50             : class Value;
      51             : 
      52             : /// \brief Information about a load/store intrinsic defined by the target.
      53     2325956 : struct MemIntrinsicInfo {
      54             :   /// This is the pointer that the intrinsic is loading from or storing to.
      55             :   /// If this is non-null, then analysis/optimization passes can assume that
      56             :   /// this intrinsic is functionally equivalent to a load/store from this
      57             :   /// pointer.
      58             :   Value *PtrVal = nullptr;
      59             : 
      60             :   // Ordering for atomic operations.
      61             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
      62             : 
      63             :   // Same Id is set by the target for corresponding load/store intrinsics.
      64             :   unsigned short MatchingId = 0;
      65             : 
      66             :   bool ReadMem = false;
      67             :   bool WriteMem = false;
      68             :   bool IsVolatile = false;
      69             : 
      70             :   bool isUnordered() const {
      71          74 :     return (Ordering == AtomicOrdering::NotAtomic ||
      72          74 :             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
      73             :   }
      74             : };
      75             : 
      76             : /// \brief This pass provides access to the codegen interfaces that are needed
      77             : /// for IR-level transformations.
      78             : class TargetTransformInfo {
      79             : public:
      80             :   /// \brief Construct a TTI object using a type implementing the \c Concept
      81             :   /// API below.
      82             :   ///
      83             :   /// This is used by targets to construct a TTI wrapping their target-specific
      84             :   /// implementaion that encodes appropriate costs for their target.
      85             :   template <typename T> TargetTransformInfo(T Impl);
      86             : 
      87             :   /// \brief Construct a baseline TTI object using a minimal implementation of
      88             :   /// the \c Concept API below.
      89             :   ///
      90             :   /// The TTI implementation will reflect the information in the DataLayout
      91             :   /// provided if non-null.
      92             :   explicit TargetTransformInfo(const DataLayout &DL);
      93             : 
      94             :   // Provide move semantics.
      95             :   TargetTransformInfo(TargetTransformInfo &&Arg);
      96             :   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
      97             : 
      98             :   // We need to define the destructor out-of-line to define our sub-classes
      99             :   // out-of-line.
     100             :   ~TargetTransformInfo();
     101             : 
     102             :   /// \brief Handle the invalidation of this information.
     103             :   ///
     104             :   /// When used as a result of \c TargetIRAnalysis this method will be called
     105             :   /// when the function this was computed for changes. When it returns false,
     106             :   /// the information is preserved across those changes.
     107             :   bool invalidate(Function &, const PreservedAnalyses &,
     108             :                   FunctionAnalysisManager::Invalidator &) {
     109             :     // FIXME: We should probably in some way ensure that the subtarget
     110             :     // information for a function hasn't changed.
     111             :     return false;
     112             :   }
     113             : 
     114             :   /// \name Generic Target Information
     115             :   /// @{
     116             : 
     117             :   /// \brief The kind of cost model.
     118             :   ///
     119             :   /// There are several different cost models that can be customized by the
     120             :   /// target. The normalization of each cost model may be target specific.
     121             :   enum TargetCostKind {
     122             :     TCK_RecipThroughput, ///< Reciprocal throughput.
     123             :     TCK_Latency,         ///< The latency of instruction.
     124             :     TCK_CodeSize         ///< Instruction code size.
     125             :   };
     126             : 
     127             :   /// \brief Query the cost of a specified instruction.
     128             :   ///
     129             :   /// Clients should use this interface to query the cost of an existing
     130             :   /// instruction. The instruction must have a valid parent (basic block).
     131             :   ///
     132             :   /// Note, this method does not cache the cost calculation and it
     133             :   /// can be expensive in some cases.
     134       18270 :   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
     135       18270 :     switch (kind){
     136       18264 :     case TCK_RecipThroughput:
     137       18264 :       return getInstructionThroughput(I);
     138             : 
     139           3 :     case TCK_Latency:
     140           3 :       return getInstructionLatency(I);
     141             : 
     142           3 :     case TCK_CodeSize:
     143           3 :       return getUserCost(I);
     144             :     }
     145           0 :     llvm_unreachable("Unknown instruction cost kind");
     146             :   }
     147             : 
     148             :   /// \brief Underlying constants for 'cost' values in this interface.
     149             :   ///
     150             :   /// Many APIs in this interface return a cost. This enum defines the
     151             :   /// fundamental values that should be used to interpret (and produce) those
     152             :   /// costs. The costs are returned as an int rather than a member of this
     153             :   /// enumeration because it is expected that the cost of one IR instruction
     154             :   /// may have a multiplicative factor to it or otherwise won't fit directly
     155             :   /// into the enum. Moreover, it is common to sum or average costs which works
     156             :   /// better as simple integral values. Thus this enum only provides constants.
     157             :   /// Also note that the returned costs are signed integers to make it natural
     158             :   /// to add, subtract, and test with zero (a common boundary condition). It is
     159             :   /// not expected that 2^32 is a realistic cost to be modeling at any point.
     160             :   ///
     161             :   /// Note that these costs should usually reflect the intersection of code-size
     162             :   /// cost and execution cost. A free instruction is typically one that folds
     163             :   /// into another instruction. For example, reg-to-reg moves can often be
     164             :   /// skipped by renaming the registers in the CPU, but they still are encoded
     165             :   /// and thus wouldn't be considered 'free' here.
     166             :   enum TargetCostConstants {
     167             :     TCC_Free = 0,     ///< Expected to fold away in lowering.
     168             :     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
     169             :     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
     170             :   };
     171             : 
     172             :   /// \brief Estimate the cost of a specific operation when lowered.
     173             :   ///
     174             :   /// Note that this is designed to work on an arbitrary synthetic opcode, and
     175             :   /// thus work for hypothetical queries before an instruction has even been
     176             :   /// formed. However, this does *not* work for GEPs, and must not be called
     177             :   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
     178             :   /// analyzing a GEP's cost required more information.
     179             :   ///
     180             :   /// Typically only the result type is required, and the operand type can be
     181             :   /// omitted. However, if the opcode is one of the cast instructions, the
     182             :   /// operand type is required.
     183             :   ///
     184             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     185             :   /// comments for a detailed explanation of the cost values.
     186             :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
     187             : 
     188             :   /// \brief Estimate the cost of a GEP operation when lowered.
     189             :   ///
     190             :   /// The contract for this function is the same as \c getOperationCost except
     191             :   /// that it supports an interface that provides extra information specific to
     192             :   /// the GEP operation.
     193             :   int getGEPCost(Type *PointeeType, const Value *Ptr,
     194             :                  ArrayRef<const Value *> Operands) const;
     195             : 
     196             :   /// \brief Estimate the cost of a EXT operation when lowered.
     197             :   ///
     198             :   /// The contract for this function is the same as \c getOperationCost except
     199             :   /// that it supports an interface that provides extra information specific to
     200             :   /// the EXT operation.
     201             :   int getExtCost(const Instruction *I, const Value *Src) const;
     202             : 
     203             :   /// \brief Estimate the cost of a function call when lowered.
     204             :   ///
     205             :   /// The contract for this is the same as \c getOperationCost except that it
     206             :   /// supports an interface that provides extra information specific to call
     207             :   /// instructions.
     208             :   ///
     209             :   /// This is the most basic query for estimating call cost: it only knows the
     210             :   /// function type and (potentially) the number of arguments at the call site.
     211             :   /// The latter is only interesting for varargs function types.
     212             :   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
     213             : 
     214             :   /// \brief Estimate the cost of calling a specific function when lowered.
     215             :   ///
     216             :   /// This overload adds the ability to reason about the particular function
     217             :   /// being called in the event it is a library call with special lowering.
     218             :   int getCallCost(const Function *F, int NumArgs = -1) const;
     219             : 
     220             :   /// \brief Estimate the cost of calling a specific function when lowered.
     221             :   ///
     222             :   /// This overload allows specifying a set of candidate argument values.
     223             :   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
     224             : 
     225             :   /// \returns A value by which our inlining threshold should be multiplied.
     226             :   /// This is primarily used to bump up the inlining threshold wholesale on
     227             :   /// targets where calls are unusually expensive.
     228             :   ///
     229             :   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
     230             :   /// individual classes of instructions would be better.
     231             :   unsigned getInliningThresholdMultiplier() const;
     232             : 
     233             :   /// \brief Estimate the cost of an intrinsic when lowered.
     234             :   ///
     235             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     236             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     237             :                        ArrayRef<Type *> ParamTys) const;
     238             : 
     239             :   /// \brief Estimate the cost of an intrinsic when lowered.
     240             :   ///
     241             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     242             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     243             :                        ArrayRef<const Value *> Arguments) const;
     244             : 
     245             :   /// \return The estimated number of case clusters when lowering \p 'SI'.
     246             :   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
     247             :   /// table.
     248             :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     249             :                                             unsigned &JTSize) const;
     250             : 
     251             :   /// \brief Estimate the cost of a given IR user when lowered.
     252             :   ///
     253             :   /// This can estimate the cost of either a ConstantExpr or Instruction when
     254             :   /// lowered. It has two primary advantages over the \c getOperationCost and
     255             :   /// \c getGEPCost above, and one significant disadvantage: it can only be
     256             :   /// used when the IR construct has already been formed.
     257             :   ///
     258             :   /// The advantages are that it can inspect the SSA use graph to reason more
     259             :   /// accurately about the cost. For example, all-constant-GEPs can often be
     260             :   /// folded into a load or other instruction, but if they are used in some
     261             :   /// other context they may not be folded. This routine can distinguish such
     262             :   /// cases.
     263             :   ///
     264             :   /// \p Operands is a list of operands which can be a result of transformations
     265             :   /// of the current operands. The number of the operands on the list must equal
     266             :   /// to the number of the current operands the IR user has. Their order on the
     267             :   /// list must be the same as the order of the current operands the IR user
     268             :   /// has.
     269             :   ///
     270             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     271             :   /// comments for a detailed explanation of the cost values.
     272             :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
     273             : 
     274             :   /// \brief This is a helper function which calls the two-argument getUserCost
     275             :   /// with \p Operands which are the current operands U has.
     276     2360152 :   int getUserCost(const User *U) const {
     277             :     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
     278     9440608 :                                            U->value_op_end());
     279     4720304 :     return getUserCost(U, Operands);
     280             :   }
     281             : 
     282             :   /// \brief Return true if branch divergence exists.
     283             :   ///
     284             :   /// Branch divergence has a significantly negative impact on GPU performance
     285             :   /// when threads in the same wavefront take different paths due to conditional
     286             :   /// branches.
     287             :   bool hasBranchDivergence() const;
     288             : 
     289             :   /// \brief Returns whether V is a source of divergence.
     290             :   ///
     291             :   /// This function provides the target-dependent information for
     292             :   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
     293             :   /// builds the dependency graph, and then runs the reachability algorithm
     294             :   /// starting with the sources of divergence.
     295             :   bool isSourceOfDivergence(const Value *V) const;
     296             : 
     297             :   // \brief Returns true for the target specific
     298             :   // set of operations which produce uniform result
     299             :   // even taking non-unform arguments
     300             :   bool isAlwaysUniform(const Value *V) const;
     301             : 
     302             :   /// Returns the address space ID for a target's 'flat' address space. Note
     303             :   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
     304             :   /// refers to as the generic address space. The flat address space is a
     305             :   /// generic address space that can be used access multiple segments of memory
     306             :   /// with different address spaces. Access of a memory location through a
     307             :   /// pointer with this address space is expected to be legal but slower
     308             :   /// compared to the same memory location accessed through a pointer with a
     309             :   /// different address space.
     310             :   //
     311             :   /// This is for for targets with different pointer representations which can
     312             :   /// be converted with the addrspacecast instruction. If a pointer is converted
     313             :   /// to this address space, optimizations should attempt to replace the access
     314             :   /// with the source address space.
     315             :   ///
     316             :   /// \returns ~0u if the target does not have such a flat address space to
     317             :   /// optimize away.
     318             :   unsigned getFlatAddressSpace() const;
     319             : 
     320             :   /// \brief Test whether calls to a function lower to actual program function
     321             :   /// calls.
     322             :   ///
     323             :   /// The idea is to test whether the program is likely to require a 'call'
     324             :   /// instruction or equivalent in order to call the given function.
     325             :   ///
     326             :   /// FIXME: It's not clear that this is a good or useful query API. Client's
     327             :   /// should probably move to simpler cost metrics using the above.
     328             :   /// Alternatively, we could split the cost interface into distinct code-size
     329             :   /// and execution-speed costs. This would allow modelling the core of this
     330             :   /// query more accurately as a call is a single small instruction, but
     331             :   /// incurs significant execution cost.
     332             :   bool isLoweredToCall(const Function *F) const;
     333             : 
     334             :   struct LSRCost {
     335             :     /// TODO: Some of these could be merged. Also, a lexical ordering
     336             :     /// isn't always optimal.
     337             :     unsigned Insns;
     338             :     unsigned NumRegs;
     339             :     unsigned AddRecCost;
     340             :     unsigned NumIVMuls;
     341             :     unsigned NumBaseAdds;
     342             :     unsigned ImmCost;
     343             :     unsigned SetupCost;
     344             :     unsigned ScaleCost;
     345             :   };
     346             : 
     347             :   /// Parameters that control the generic loop unrolling transformation.
     348             :   struct UnrollingPreferences {
     349             :     /// The cost threshold for the unrolled loop. Should be relative to the
     350             :     /// getUserCost values returned by this API, and the expectation is that
     351             :     /// the unrolled loop's instructions when run through that interface should
     352             :     /// not exceed this cost. However, this is only an estimate. Also, specific
     353             :     /// loops may be unrolled even with a cost above this threshold if deemed
     354             :     /// profitable. Set this to UINT_MAX to disable the loop body cost
     355             :     /// restriction.
     356             :     unsigned Threshold;
     357             :     /// If complete unrolling will reduce the cost of the loop, we will boost
     358             :     /// the Threshold by a certain percent to allow more aggressive complete
     359             :     /// unrolling. This value provides the maximum boost percentage that we
     360             :     /// can apply to Threshold (The value should be no less than 100).
     361             :     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
     362             :     ///                                    MaxPercentThresholdBoost / 100)
     363             :     /// E.g. if complete unrolling reduces the loop execution time by 50%
     364             :     /// then we boost the threshold by the factor of 2x. If unrolling is not
     365             :     /// expected to reduce the running time, then we do not increase the
     366             :     /// threshold.
     367             :     unsigned MaxPercentThresholdBoost;
     368             :     /// The cost threshold for the unrolled loop when optimizing for size (set
     369             :     /// to UINT_MAX to disable).
     370             :     unsigned OptSizeThreshold;
     371             :     /// The cost threshold for the unrolled loop, like Threshold, but used
     372             :     /// for partial/runtime unrolling (set to UINT_MAX to disable).
     373             :     unsigned PartialThreshold;
     374             :     /// The cost threshold for the unrolled loop when optimizing for size, like
     375             :     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
     376             :     /// UINT_MAX to disable).
     377             :     unsigned PartialOptSizeThreshold;
     378             :     /// A forced unrolling factor (the number of concatenated bodies of the
     379             :     /// original loop in the unrolled loop body). When set to 0, the unrolling
     380             :     /// transformation will select an unrolling factor based on the current cost
     381             :     /// threshold and other factors.
     382             :     unsigned Count;
     383             :     /// A forced peeling factor (the number of bodied of the original loop
     384             :     /// that should be peeled off before the loop body). When set to 0, the
     385             :     /// unrolling transformation will select a peeling factor based on profile
     386             :     /// information and other factors.
     387             :     unsigned PeelCount;
     388             :     /// Default unroll count for loops with run-time trip count.
     389             :     unsigned DefaultUnrollRuntimeCount;
     390             :     // Set the maximum unrolling factor. The unrolling factor may be selected
     391             :     // using the appropriate cost threshold, but may not exceed this number
     392             :     // (set to UINT_MAX to disable). This does not apply in cases where the
     393             :     // loop is being fully unrolled.
     394             :     unsigned MaxCount;
     395             :     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     396             :     /// applies even if full unrolling is selected. This allows a target to fall
     397             :     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     398             :     unsigned FullUnrollMaxCount;
     399             :     // Represents number of instructions optimized when "back edge"
     400             :     // becomes "fall through" in unrolled loop.
     401             :     // For now we count a conditional branch on a backedge and a comparison
     402             :     // feeding it.
     403             :     unsigned BEInsns;
     404             :     /// Allow partial unrolling (unrolling of loops to expand the size of the
     405             :     /// loop body, not only to eliminate small constant-trip-count loops).
     406             :     bool Partial;
     407             :     /// Allow runtime unrolling (unrolling of loops to expand the size of the
     408             :     /// loop body even when the number of loop iterations is not known at
     409             :     /// compile time).
     410             :     bool Runtime;
     411             :     /// Allow generation of a loop remainder (extra iterations after unroll).
     412             :     bool AllowRemainder;
     413             :     /// Allow emitting expensive instructions (such as divisions) when computing
     414             :     /// the trip count of a loop for runtime unrolling.
     415             :     bool AllowExpensiveTripCount;
     416             :     /// Apply loop unroll on any kind of loop
     417             :     /// (mainly to loops that fail runtime unrolling).
     418             :     bool Force;
     419             :     /// Allow using trip count upper bound to unroll loops.
     420             :     bool UpperBound;
     421             :     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     422             :     bool AllowPeeling;
     423             :     /// Allow unrolling of all the iterations of the runtime loop remainder.
     424             :     bool UnrollRemainder;
     425             :   };
     426             : 
     427             :   /// \brief Get target-customized preferences for the generic loop unrolling
     428             :   /// transformation. The caller will initialize UP with the current
     429             :   /// target-independent defaults.
     430             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     431             :                                UnrollingPreferences &UP) const;
     432             : 
     433             :   /// @}
     434             : 
     435             :   /// \name Scalar Target Information
     436             :   /// @{
     437             : 
     438             :   /// \brief Flags indicating the kind of support for population count.
     439             :   ///
     440             :   /// Compared to the SW implementation, HW support is supposed to
     441             :   /// significantly boost the performance when the population is dense, and it
     442             :   /// may or may not degrade performance if the population is sparse. A HW
     443             :   /// support is considered as "Fast" if it can outperform, or is on a par
     444             :   /// with, SW implementation when the population is sparse; otherwise, it is
     445             :   /// considered as "Slow".
     446             :   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
     447             : 
     448             :   /// \brief Return true if the specified immediate is legal add immediate, that
     449             :   /// is the target has add instructions which can add a register with the
     450             :   /// immediate without having to materialize the immediate into a register.
     451             :   bool isLegalAddImmediate(int64_t Imm) const;
     452             : 
     453             :   /// \brief Return true if the specified immediate is legal icmp immediate,
     454             :   /// that is the target has icmp instructions which can compare a register
     455             :   /// against the immediate without having to materialize the immediate into a
     456             :   /// register.
     457             :   bool isLegalICmpImmediate(int64_t Imm) const;
     458             : 
     459             :   /// \brief Return true if the addressing mode represented by AM is legal for
     460             :   /// this target, for a load/store of the specified type.
     461             :   /// The type may be VoidTy, in which case only return true if the addressing
     462             :   /// mode is legal for a load/store of any legal type.
     463             :   /// If target returns true in LSRWithInstrQueries(), I may be valid.
     464             :   /// TODO: Handle pre/postinc as well.
     465             :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     466             :                              bool HasBaseReg, int64_t Scale,
     467             :                              unsigned AddrSpace = 0,
     468             :                              Instruction *I = nullptr) const;
     469             : 
     470             :   /// \brief Return true if LSR cost of C1 is lower than C1.
     471             :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     472             :                      TargetTransformInfo::LSRCost &C2) const;
     473             : 
     474             :   /// \brief Return true if the target supports masked load/store
     475             :   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
     476             :   bool isLegalMaskedStore(Type *DataType) const;
     477             :   bool isLegalMaskedLoad(Type *DataType) const;
     478             : 
     479             :   /// \brief Return true if the target supports masked gather/scatter
     480             :   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
     481             :   /// bits scalar type.
     482             :   bool isLegalMaskedScatter(Type *DataType) const;
     483             :   bool isLegalMaskedGather(Type *DataType) const;
     484             : 
     485             :   /// Return true if the target has a unified operation to calculate division
     486             :   /// and remainder. If so, the additional implicit multiplication and
     487             :   /// subtraction required to calculate a remainder from division are free. This
     488             :   /// can enable more aggressive transformations for division and remainder than
     489             :   /// would typically be allowed using throughput or size cost models.
     490             :   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
     491             : 
     492             :   /// Return true if target doesn't mind addresses in vectors.
     493             :   bool prefersVectorizedAddressing() const;
     494             : 
     495             :   /// \brief Return the cost of the scaling factor used in the addressing
     496             :   /// mode represented by AM for this target, for a load/store
     497             :   /// of the specified type.
     498             :   /// If the AM is supported, the return value must be >= 0.
     499             :   /// If the AM is not supported, it returns a negative value.
     500             :   /// TODO: Handle pre/postinc as well.
     501             :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     502             :                            bool HasBaseReg, int64_t Scale,
     503             :                            unsigned AddrSpace = 0) const;
     504             : 
     505             :   /// \brief Return true if the loop strength reduce pass should make
     506             :   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
     507             :   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
     508             :   /// immediate offset and no index register.
     509             :   bool LSRWithInstrQueries() const;
     510             : 
     511             :   /// \brief Return true if it's free to truncate a value of type Ty1 to type
     512             :   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
     513             :   /// by referencing its sub-register AX.
     514             :   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
     515             : 
     516             :   /// \brief Return true if it is profitable to hoist instruction in the
     517             :   /// then/else to before if.
     518             :   bool isProfitableToHoist(Instruction *I) const;
     519             : 
     520             :   /// \brief Return true if this type is legal.
     521             :   bool isTypeLegal(Type *Ty) const;
     522             : 
     523             :   /// \brief Returns the target's jmp_buf alignment in bytes.
     524             :   unsigned getJumpBufAlignment() const;
     525             : 
     526             :   /// \brief Returns the target's jmp_buf size in bytes.
     527             :   unsigned getJumpBufSize() const;
     528             : 
     529             :   /// \brief Return true if switches should be turned into lookup tables for the
     530             :   /// target.
     531             :   bool shouldBuildLookupTables() const;
     532             : 
     533             :   /// \brief Return true if switches should be turned into lookup tables
     534             :   /// containing this constant value for the target.
     535             :   bool shouldBuildLookupTablesForConstant(Constant *C) const;
     536             : 
     537             :   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
     538             : 
     539             :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     540             :                                             unsigned VF) const;
     541             : 
     542             :   /// If target has efficient vector element load/store instructions, it can
     543             :   /// return true here so that insertion/extraction costs are not added to
     544             :   /// the scalarization cost of a load/store.
     545             :   bool supportsEfficientVectorElementLoadStore() const;
     546             : 
     547             :   /// \brief Don't restrict interleaved unrolling to small loops.
     548             :   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
     549             : 
     550             :   /// \brief Enable inline expansion of memcmp
     551             :   bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const;
     552             : 
     553             :   /// \brief Enable matching of interleaved access groups.
     554             :   bool enableInterleavedAccessVectorization() const;
     555             : 
     556             :   /// \brief Indicate that it is potentially unsafe to automatically vectorize
     557             :   /// floating-point operations because the semantics of vector and scalar
     558             :   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
     559             :   /// does not support IEEE-754 denormal numbers, while depending on the
     560             :   /// platform, scalar floating-point math does.
     561             :   /// This applies to floating-point math operations and calls, not memory
     562             :   /// operations, shuffles, or casts.
     563             :   bool isFPVectorizationPotentiallyUnsafe() const;
     564             : 
     565             :   /// \brief Determine if the target supports unaligned memory accesses.
     566             :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     567             :                                       unsigned BitWidth, unsigned AddressSpace = 0,
     568             :                                       unsigned Alignment = 1,
     569             :                                       bool *Fast = nullptr) const;
     570             : 
     571             :   /// \brief Return hardware support for population count.
     572             :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
     573             : 
     574             :   /// \brief Return true if the hardware has a fast square-root instruction.
     575             :   bool haveFastSqrt(Type *Ty) const;
     576             : 
     577             :   /// \brief Return the expected cost of supporting the floating point operation
     578             :   /// of the specified type.
     579             :   int getFPOpCost(Type *Ty) const;
     580             : 
     581             :   /// \brief Return the expected cost of materializing for the given integer
     582             :   /// immediate of the specified type.
     583             :   int getIntImmCost(const APInt &Imm, Type *Ty) const;
     584             : 
     585             :   /// \brief Return the expected cost of materialization for the given integer
     586             :   /// immediate of the specified type for a given instruction. The cost can be
     587             :   /// zero if the immediate can be folded into the specified instruction.
     588             :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     589             :                     Type *Ty) const;
     590             :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     591             :                     Type *Ty) const;
     592             : 
     593             :   /// \brief Return the expected cost for the given integer when optimising
     594             :   /// for size. This is different than the other integer immediate cost
     595             :   /// functions in that it is subtarget agnostic. This is useful when you e.g.
     596             :   /// target one ISA such as Aarch32 but smaller encodings could be possible
     597             :   /// with another such as Thumb. This return value is used as a penalty when
     598             :   /// the total costs for a constant is calculated (the bigger the cost, the
     599             :   /// more beneficial constant hoisting is).
     600             :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     601             :                             Type *Ty) const;
     602             :   /// @}
     603             : 
     604             :   /// \name Vector Target Information
     605             :   /// @{
     606             : 
     607             :   /// \brief The various kinds of shuffle patterns for vector queries.
     608             :   enum ShuffleKind {
     609             :     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     610             :     SK_Reverse,         ///< Reverse the order of the vector.
     611             :     SK_Alternate,       ///< Choose alternate elements from vector.
     612             :     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     613             :     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
     614             :     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
     615             :                         ///< with any shuffle mask.
     616             :     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
     617             :                         ///< shuffle mask.
     618             :   };
     619             : 
     620             :   /// \brief Additional information about an operand's possible values.
     621             :   enum OperandValueKind {
     622             :     OK_AnyValue,               // Operand can have any value.
     623             :     OK_UniformValue,           // Operand is uniform (splat of a value).
     624             :     OK_UniformConstantValue,   // Operand is uniform constant.
     625             :     OK_NonUniformConstantValue // Operand is a non uniform constant value.
     626             :   };
     627             : 
     628             :   /// \brief Additional properties of an operand's values.
     629             :   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
     630             : 
     631             :   /// \return The number of scalar or vector registers that the target has.
     632             :   /// If 'Vectors' is true, it returns the number of vector registers. If it is
     633             :   /// set to false, it returns the number of scalar registers.
     634             :   unsigned getNumberOfRegisters(bool Vector) const;
     635             : 
     636             :   /// \return The width of the largest scalar or vector register type.
     637             :   unsigned getRegisterBitWidth(bool Vector) const;
     638             : 
     639             :   /// \return The width of the smallest vector register type.
     640             :   unsigned getMinVectorRegisterBitWidth() const;
     641             : 
     642             :   /// \return True if it should be considered for address type promotion.
     643             :   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
     644             :   /// profitable without finding other extensions fed by the same input.
     645             :   bool shouldConsiderAddressTypePromotion(
     646             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
     647             : 
     648             :   /// \return The size of a cache line in bytes.
     649             :   unsigned getCacheLineSize() const;
     650             : 
     651             :   /// The possible cache levels
     652             :   enum class CacheLevel {
     653             :     L1D,   // The L1 data cache
     654             :     L2D,   // The L2 data cache
     655             : 
     656             :     // We currently do not model L3 caches, as their sizes differ widely between
     657             :     // microarchitectures. Also, we currently do not have a use for L3 cache
     658             :     // size modeling yet.
     659             :   };
     660             : 
     661             :   /// \return The size of the cache level in bytes, if available.
     662             :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
     663             : 
     664             :   /// \return The associativity of the cache level, if available.
     665             :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
     666             : 
     667             :   /// \return How much before a load we should place the prefetch instruction.
     668             :   /// This is currently measured in number of instructions.
     669             :   unsigned getPrefetchDistance() const;
     670             : 
     671             :   /// \return Some HW prefetchers can handle accesses up to a certain constant
     672             :   /// stride.  This is the minimum stride in bytes where it makes sense to start
     673             :   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
     674             :   unsigned getMinPrefetchStride() const;
     675             : 
     676             :   /// \return The maximum number of iterations to prefetch ahead.  If the
     677             :   /// required number of iterations is more than this number, no prefetching is
     678             :   /// performed.
     679             :   unsigned getMaxPrefetchIterationsAhead() const;
     680             : 
     681             :   /// \return The maximum interleave factor that any transform should try to
     682             :   /// perform for this target. This number depends on the level of parallelism
     683             :   /// and the number of execution units in the CPU.
     684             :   unsigned getMaxInterleaveFactor(unsigned VF) const;
     685             : 
     686             :   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
     687             :   /// \p Args is an optional argument which holds the instruction operands  
     688             :   /// values so the TTI can analyize those values searching for special 
     689             :   /// cases\optimizations based on those values.
     690             :   int getArithmeticInstrCost(
     691             :       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
     692             :       OperandValueKind Opd2Info = OK_AnyValue,
     693             :       OperandValueProperties Opd1PropInfo = OP_None,
     694             :       OperandValueProperties Opd2PropInfo = OP_None,
     695             :       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
     696             : 
     697             :   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
     698             :   /// The index and subtype parameters are used by the subvector insertion and
     699             :   /// extraction shuffle kinds.
     700             :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
     701             :                      Type *SubTp = nullptr) const;
     702             : 
     703             :   /// \return The expected cost of cast instructions, such as bitcast, trunc,
     704             :   /// zext, etc. If there is an existing instruction that holds Opcode, it
     705             :   /// may be passed in the 'I' parameter.
     706             :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     707             :                        const Instruction *I = nullptr) const;
     708             : 
     709             :   /// \return The expected cost of a sign- or zero-extended vector extract. Use
     710             :   /// -1 to indicate that there is no information about the index value.
     711             :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
     712             :                                unsigned Index = -1) const;
     713             : 
     714             :   /// \return The expected cost of control-flow related instructions such as
     715             :   /// Phi, Ret, Br.
     716             :   int getCFInstrCost(unsigned Opcode) const;
     717             : 
     718             :   /// \returns The expected cost of compare and select instructions. If there
     719             :   /// is an existing instruction that holds Opcode, it may be passed in the
     720             :   /// 'I' parameter.
     721             :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     722             :                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
     723             : 
     724             :   /// \return The expected cost of vector Insert and Extract.
     725             :   /// Use -1 to indicate that there is no information on the index value.
     726             :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
     727             : 
     728             :   /// \return The cost of Load and Store instructions.
     729             :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     730             :                       unsigned AddressSpace, const Instruction *I = nullptr) const;
     731             : 
     732             :   /// \return The cost of masked Load and Store instructions.
     733             :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     734             :                             unsigned AddressSpace) const;
     735             : 
     736             :   /// \return The cost of Gather or Scatter operation
     737             :   /// \p Opcode - is a type of memory access Load or Store
     738             :   /// \p DataTy - a vector type of the data to be loaded or stored
     739             :   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
     740             :   /// \p VariableMask - true when the memory access is predicated with a mask
     741             :   ///                   that is not a compile-time constant
     742             :   /// \p Alignment - alignment of single element
     743             :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
     744             :                              bool VariableMask, unsigned Alignment) const;
     745             : 
     746             :   /// \return The cost of the interleaved memory operation.
     747             :   /// \p Opcode is the memory operation code
     748             :   /// \p VecTy is the vector type of the interleaved access.
     749             :   /// \p Factor is the interleave factor
     750             :   /// \p Indices is the indices for interleaved load members (as interleaved
     751             :   ///    load allows gaps)
     752             :   /// \p Alignment is the alignment of the memory operation
     753             :   /// \p AddressSpace is address space of the pointer.
     754             :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
     755             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
     756             :                                  unsigned AddressSpace) const;
     757             : 
     758             :   /// \brief Calculate the cost of performing a vector reduction.
     759             :   ///
     760             :   /// This is the cost of reducing the vector value of type \p Ty to a scalar
     761             :   /// value using the operation denoted by \p Opcode. The form of the reduction
     762             :   /// can either be a pairwise reduction or a reduction that splits the vector
     763             :   /// at every reduction level.
     764             :   ///
     765             :   /// Pairwise:
     766             :   ///  (v0, v1, v2, v3)
     767             :   ///  ((v0+v1), (v2, v3), undef, undef)
     768             :   /// Split:
     769             :   ///  (v0, v1, v2, v3)
     770             :   ///  ((v0+v2), (v1+v3), undef, undef)
     771             :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
     772             :                                  bool IsPairwiseForm) const;
     773             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
     774             :                              bool IsUnsigned) const;
     775             : 
     776             :   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
     777             :   /// Three cases are handled: 1. scalar instruction 2. vector instruction
     778             :   /// 3. scalar instruction which is to be vectorized with VF.
     779             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     780             :                             ArrayRef<Value *> Args, FastMathFlags FMF,
     781             :                             unsigned VF = 1) const;
     782             : 
     783             :   /// \returns The cost of Intrinsic instructions. Types analysis only.
     784             :   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
     785             :   /// arguments and the return value will be computed based on types.
     786             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     787             :                             ArrayRef<Type *> Tys, FastMathFlags FMF,
     788             :                             unsigned ScalarizationCostPassed = UINT_MAX) const;
     789             : 
     790             :   /// \returns The cost of Call instructions.
     791             :   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
     792             : 
     793             :   /// \returns The number of pieces into which the provided type must be
     794             :   /// split during legalization. Zero is returned when the answer is unknown.
     795             :   unsigned getNumberOfParts(Type *Tp) const;
     796             : 
     797             :   /// \returns The cost of the address computation. For most targets this can be
     798             :   /// merged into the instruction indexing mode. Some targets might want to
     799             :   /// distinguish between address computation for memory operations on vector
     800             :   /// types and scalar types. Such targets should override this function.
     801             :   /// The 'SE' parameter holds pointer for the scalar evolution object which
     802             :   /// is used in order to get the Ptr step value in case of constant stride.
     803             :   /// The 'Ptr' parameter holds SCEV of the access pointer.
     804             :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
     805             :                                 const SCEV *Ptr = nullptr) const;
     806             : 
     807             :   /// \returns The cost, if any, of keeping values of the given types alive
     808             :   /// over a callsite.
     809             :   ///
     810             :   /// Some types may require the use of register classes that do not have
     811             :   /// any callee-saved registers, so would require a spill and fill.
     812             :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
     813             : 
     814             :   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
     815             :   /// will contain additional information - whether the intrinsic may write
     816             :   /// or read to memory, volatility and the pointer.  Info is undefined
     817             :   /// if false is returned.
     818             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     819             : 
     820             :   /// \returns The maximum element size, in bytes, for an element
     821             :   /// unordered-atomic memory intrinsic.
     822             :   unsigned getAtomicMemIntrinsicMaxElementSize() const;
     823             : 
     824             :   /// \returns A value which is the result of the given memory intrinsic.  New
     825             :   /// instructions may be created to extract the result from the given intrinsic
     826             :   /// memory operation.  Returns nullptr if the target cannot create a result
     827             :   /// from the given intrinsic.
     828             :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     829             :                                            Type *ExpectedType) const;
     830             : 
     831             :   /// \returns The type to use in a loop expansion of a memcpy call.
     832             :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
     833             :                                   unsigned SrcAlign, unsigned DestAlign) const;
     834             : 
     835             :   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
     836             :   /// \param RemainingBytes The number of bytes to copy.
     837             :   ///
     838             :   /// Calculates the operand types to use when copying \p RemainingBytes of
     839             :   /// memory, where source and destination alignments are \p SrcAlign and
     840             :   /// \p DestAlign respectively.
     841             :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
     842             :                                          LLVMContext &Context,
     843             :                                          unsigned RemainingBytes,
     844             :                                          unsigned SrcAlign,
     845             :                                          unsigned DestAlign) const;
     846             : 
     847             :   /// \returns True if we want to test the new memcpy lowering functionality in
     848             :   /// Transform/Utils.
     849             :   /// Temporary. Will be removed once we move to the new functionality and
     850             :   /// remove the old.
     851             :   bool useWideIRMemcpyLoopLowering() const;
     852             : 
     853             :   /// \returns True if the two functions have compatible attributes for inlining
     854             :   /// purposes.
     855             :   bool areInlineCompatible(const Function *Caller,
     856             :                            const Function *Callee) const;
     857             : 
     858             :   /// \returns The bitwidth of the largest vector type that should be used to
     859             :   /// load/store in the given address space.
     860             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     861             : 
     862             :   /// \returns True if the load instruction is legal to vectorize.
     863             :   bool isLegalToVectorizeLoad(LoadInst *LI) const;
     864             : 
     865             :   /// \returns True if the store instruction is legal to vectorize.
     866             :   bool isLegalToVectorizeStore(StoreInst *SI) const;
     867             : 
     868             :   /// \returns True if it is legal to vectorize the given load chain.
     869             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     870             :                                    unsigned Alignment,
     871             :                                    unsigned AddrSpace) const;
     872             : 
     873             :   /// \returns True if it is legal to vectorize the given store chain.
     874             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     875             :                                     unsigned Alignment,
     876             :                                     unsigned AddrSpace) const;
     877             : 
     878             :   /// \returns The new vector factor value if the target doesn't support \p
     879             :   /// SizeInBytes loads or has a better vector factor.
     880             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     881             :                                unsigned ChainSizeInBytes,
     882             :                                VectorType *VecTy) const;
     883             : 
     884             :   /// \returns The new vector factor value if the target doesn't support \p
     885             :   /// SizeInBytes stores or has a better vector factor.
     886             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     887             :                                 unsigned ChainSizeInBytes,
     888             :                                 VectorType *VecTy) const;
     889             : 
     890             :   /// Flags describing the kind of vector reduction.
     891             :   struct ReductionFlags {
     892         263 :     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
     893             :     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
     894             :     bool IsSigned; ///< Whether the operation is a signed int reduction.
     895             :     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
     896             :   };
     897             : 
     898             :   /// \returns True if the target wants to handle the given reduction idiom in
     899             :   /// the intrinsics form instead of the shuffle form.
     900             :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
     901             :                              ReductionFlags Flags) const;
     902             : 
     903             :   /// \returns True if the target wants to expand the given reduction intrinsic
     904             :   /// into a shuffle sequence.
     905             :   bool shouldExpandReduction(const IntrinsicInst *II) const;
     906             :   /// @}
     907             : 
     908             : private:
     909             :   /// \brief Estimate the latency of specified instruction.
     910             :   /// Returns 1 as the default value.
     911             :   int getInstructionLatency(const Instruction *I) const;
     912             : 
     913             :   /// \brief Returns the expected throughput cost of the instruction.
     914             :   /// Returns -1 if the cost is unknown.
     915             :   int getInstructionThroughput(const Instruction *I) const;
     916             : 
     917             :   /// \brief The abstract base class used to type erase specific TTI
     918             :   /// implementations.
     919             :   class Concept;
     920             : 
     921             :   /// \brief The template model for the base class which wraps a concrete
     922             :   /// implementation in a type erased interface.
     923             :   template <typename T> class Model;
     924             : 
     925             :   std::unique_ptr<Concept> TTIImpl;
     926             : };
     927             : 
     928             : class TargetTransformInfo::Concept {
     929             : public:
     930             :   virtual ~Concept() = 0;
     931             :   virtual const DataLayout &getDataLayout() const = 0;
     932             :   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
     933             :   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
     934             :                          ArrayRef<const Value *> Operands) = 0;
     935             :   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
     936             :   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
     937             :   virtual int getCallCost(const Function *F, int NumArgs) = 0;
     938             :   virtual int getCallCost(const Function *F,
     939             :                           ArrayRef<const Value *> Arguments) = 0;
     940             :   virtual unsigned getInliningThresholdMultiplier() = 0;
     941             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     942             :                                ArrayRef<Type *> ParamTys) = 0;
     943             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     944             :                                ArrayRef<const Value *> Arguments) = 0;
     945             :   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     946             :                                                     unsigned &JTSize) = 0;
     947             :   virtual int
     948             :   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
     949             :   virtual bool hasBranchDivergence() = 0;
     950             :   virtual bool isSourceOfDivergence(const Value *V) = 0;
     951             :   virtual bool isAlwaysUniform(const Value *V) = 0;
     952             :   virtual unsigned getFlatAddressSpace() = 0;
     953             :   virtual bool isLoweredToCall(const Function *F) = 0;
     954             :   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     955             :                                        UnrollingPreferences &UP) = 0;
     956             :   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
     957             :   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
     958             :   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
     959             :                                      int64_t BaseOffset, bool HasBaseReg,
     960             :                                      int64_t Scale,
     961             :                                      unsigned AddrSpace,
     962             :                                      Instruction *I) = 0;
     963             :   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     964             :                              TargetTransformInfo::LSRCost &C2) = 0;
     965             :   virtual bool isLegalMaskedStore(Type *DataType) = 0;
     966             :   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
     967             :   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
     968             :   virtual bool isLegalMaskedGather(Type *DataType) = 0;
     969             :   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
     970             :   virtual bool prefersVectorizedAddressing() = 0;
     971             :   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
     972             :                                    int64_t BaseOffset, bool HasBaseReg,
     973             :                                    int64_t Scale, unsigned AddrSpace) = 0;
     974             :   virtual bool LSRWithInstrQueries() = 0;
     975             :   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
     976             :   virtual bool isProfitableToHoist(Instruction *I) = 0;
     977             :   virtual bool isTypeLegal(Type *Ty) = 0;
     978             :   virtual unsigned getJumpBufAlignment() = 0;
     979             :   virtual unsigned getJumpBufSize() = 0;
     980             :   virtual bool shouldBuildLookupTables() = 0;
     981             :   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
     982             :   virtual unsigned
     983             :   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
     984             :   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     985             :                                                     unsigned VF) = 0;
     986             :   virtual bool supportsEfficientVectorElementLoadStore() = 0;
     987             :   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
     988             :   virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0;
     989             :   virtual bool enableInterleavedAccessVectorization() = 0;
     990             :   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
     991             :   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     992             :                                               unsigned BitWidth,
     993             :                                               unsigned AddressSpace,
     994             :                                               unsigned Alignment,
     995             :                                               bool *Fast) = 0;
     996             :   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
     997             :   virtual bool haveFastSqrt(Type *Ty) = 0;
     998             :   virtual int getFPOpCost(Type *Ty) = 0;
     999             :   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1000             :                                     Type *Ty) = 0;
    1001             :   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    1002             :   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1003             :                             Type *Ty) = 0;
    1004             :   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1005             :                             Type *Ty) = 0;
    1006             :   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    1007             :   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
    1008             :   virtual unsigned getMinVectorRegisterBitWidth() = 0;
    1009             :   virtual bool shouldConsiderAddressTypePromotion(
    1010             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
    1011             :   virtual unsigned getCacheLineSize() = 0;
    1012             :   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
    1013             :   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
    1014             :   virtual unsigned getPrefetchDistance() = 0;
    1015             :   virtual unsigned getMinPrefetchStride() = 0;
    1016             :   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    1017             :   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    1018             :   virtual unsigned
    1019             :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1020             :                          OperandValueKind Opd2Info,
    1021             :                          OperandValueProperties Opd1PropInfo,
    1022             :                          OperandValueProperties Opd2PropInfo,
    1023             :                          ArrayRef<const Value *> Args) = 0;
    1024             :   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1025             :                              Type *SubTp) = 0;
    1026             :   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1027             :                                const Instruction *I) = 0;
    1028             :   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
    1029             :                                        VectorType *VecTy, unsigned Index) = 0;
    1030             :   virtual int getCFInstrCost(unsigned Opcode) = 0;
    1031             :   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    1032             :                                 Type *CondTy, const Instruction *I) = 0;
    1033             :   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
    1034             :                                  unsigned Index) = 0;
    1035             :   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1036             :                               unsigned AddressSpace, const Instruction *I) = 0;
    1037             :   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    1038             :                                     unsigned Alignment,
    1039             :                                     unsigned AddressSpace) = 0;
    1040             :   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1041             :                                      Value *Ptr, bool VariableMask,
    1042             :                                      unsigned Alignment) = 0;
    1043             :   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
    1044             :                                          unsigned Factor,
    1045             :                                          ArrayRef<unsigned> Indices,
    1046             :                                          unsigned Alignment,
    1047             :                                          unsigned AddressSpace) = 0;
    1048             :   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1049             :                                          bool IsPairwiseForm) = 0;
    1050             :   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1051             :                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
    1052             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1053             :                       ArrayRef<Type *> Tys, FastMathFlags FMF,
    1054             :                       unsigned ScalarizationCostPassed) = 0;
    1055             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1056             :          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    1057             :   virtual int getCallInstrCost(Function *F, Type *RetTy,
    1058             :                                ArrayRef<Type *> Tys) = 0;
    1059             :   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    1060             :   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1061             :                                         const SCEV *Ptr) = 0;
    1062             :   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    1063             :   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1064             :                                   MemIntrinsicInfo &Info) = 0;
    1065             :   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
    1066             :   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1067             :                                                    Type *ExpectedType) = 0;
    1068             :   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1069             :                                           unsigned SrcAlign,
    1070             :                                           unsigned DestAlign) const = 0;
    1071             :   virtual void getMemcpyLoopResidualLoweringType(
    1072             :       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    1073             :       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
    1074             :   virtual bool areInlineCompatible(const Function *Caller,
    1075             :                                    const Function *Callee) const = 0;
    1076             :   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
    1077             :   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
    1078             :   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
    1079             :   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1080             :                                            unsigned Alignment,
    1081             :                                            unsigned AddrSpace) const = 0;
    1082             :   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1083             :                                             unsigned Alignment,
    1084             :                                             unsigned AddrSpace) const = 0;
    1085             :   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1086             :                                        unsigned ChainSizeInBytes,
    1087             :                                        VectorType *VecTy) const = 0;
    1088             :   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1089             :                                         unsigned ChainSizeInBytes,
    1090             :                                         VectorType *VecTy) const = 0;
    1091             :   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1092             :                                      ReductionFlags) const = 0;
    1093             :   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
    1094             :   virtual int getInstructionLatency(const Instruction *I) = 0;
    1095             : };
    1096             : 
    1097             : template <typename T>
    1098             : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    1099             :   T Impl;
    1100             : 
    1101             : public:
    1102     3435852 :   Model(T Impl) : Impl(std::move(Impl)) {}
    1103     1717834 :   ~Model() override {}
    1104             : 
    1105           0 :   const DataLayout &getDataLayout() const override {
    1106           0 :     return Impl.getDataLayout();
    1107             :   }
    1108             : 
    1109           0 :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    1110           0 :     return Impl.getOperationCost(Opcode, Ty, OpTy);
    1111             :   }
    1112       29491 :   int getGEPCost(Type *PointeeType, const Value *Ptr,
    1113             :                  ArrayRef<const Value *> Operands) override {
    1114       58944 :     return Impl.getGEPCost(PointeeType, Ptr, Operands);
    1115             :   }
    1116           0 :   int getExtCost(const Instruction *I, const Value *Src) override {
    1117           0 :     return Impl.getExtCost(I, Src);
    1118             :   }
    1119           0 :   int getCallCost(FunctionType *FTy, int NumArgs) override {
    1120           0 :     return Impl.getCallCost(FTy, NumArgs);
    1121             :   }
    1122           0 :   int getCallCost(const Function *F, int NumArgs) override {
    1123           0 :     return Impl.getCallCost(F, NumArgs);
    1124             :   }
    1125           0 :   int getCallCost(const Function *F,
    1126             :                   ArrayRef<const Value *> Arguments) override {
    1127           0 :     return Impl.getCallCost(F, Arguments);
    1128             :   }
    1129      226843 :   unsigned getInliningThresholdMultiplier() override {
    1130      226843 :     return Impl.getInliningThresholdMultiplier();
    1131             :   }
    1132           0 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1133             :                        ArrayRef<Type *> ParamTys) override {
    1134           0 :     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    1135             :   }
    1136           4 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1137             :                        ArrayRef<const Value *> Arguments) override {
    1138           8 :     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    1139             :   }
    1140     2585189 :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
    1141     2585189 :     return Impl.getUserCost(U, Operands);
    1142             :   }
    1143       93276 :   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    1144      510284 :   bool isSourceOfDivergence(const Value *V) override {
    1145      510284 :     return Impl.isSourceOfDivergence(V);
    1146             :   }
    1147             : 
    1148      141473 :   bool isAlwaysUniform(const Value *V) override {
    1149      141473 :     return Impl.isAlwaysUniform(V);
    1150             :   }
    1151             : 
    1152       18270 :   unsigned getFlatAddressSpace() override {
    1153       35561 :     return Impl.getFlatAddressSpace();
    1154             :   }
    1155             : 
    1156      901157 :   bool isLoweredToCall(const Function *F) override {
    1157      901157 :     return Impl.isLoweredToCall(F);
    1158             :   }
    1159        5766 :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
    1160             :                                UnrollingPreferences &UP) override {
    1161        5766 :     return Impl.getUnrollingPreferences(L, SE, UP);
    1162             :   }
    1163       15344 :   bool isLegalAddImmediate(int64_t Imm) override {
    1164       30321 :     return Impl.isLegalAddImmediate(Imm);
    1165             :   }
    1166       20376 :   bool isLegalICmpImmediate(int64_t Imm) override {
    1167       40667 :     return Impl.isLegalICmpImmediate(Imm);
    1168             :   }
    1169      295128 :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1170             :                              bool HasBaseReg, int64_t Scale,
    1171             :                              unsigned AddrSpace,
    1172             :                              Instruction *I) override {
    1173             :     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    1174      590256 :                                       Scale, AddrSpace, I);
    1175             :   }
    1176       57630 :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1177             :                      TargetTransformInfo::LSRCost &C2) override {
    1178       69060 :     return Impl.isLSRCostLess(C1, C2);
    1179             :   }
    1180         139 :   bool isLegalMaskedStore(Type *DataType) override {
    1181         139 :     return Impl.isLegalMaskedStore(DataType);
    1182             :   }
    1183         243 :   bool isLegalMaskedLoad(Type *DataType) override {
    1184         243 :     return Impl.isLegalMaskedLoad(DataType);
    1185             :   }
    1186         239 :   bool isLegalMaskedScatter(Type *DataType) override {
    1187         239 :     return Impl.isLegalMaskedScatter(DataType);
    1188             :   }
    1189         595 :   bool isLegalMaskedGather(Type *DataType) override {
    1190         595 :     return Impl.isLegalMaskedGather(DataType);
    1191             :   }
    1192          23 :   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    1193          23 :     return Impl.hasDivRemOp(DataType, IsSigned);
    1194             :   }
    1195         966 :   bool prefersVectorizedAddressing() override {
    1196         966 :     return Impl.prefersVectorizedAddressing();
    1197             :   }
    1198       64812 :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1199             :                            bool HasBaseReg, int64_t Scale,
    1200             :                            unsigned AddrSpace) override {
    1201             :     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
    1202      129624 :                                      Scale, AddrSpace);
    1203             :   }
    1204       37755 :   bool LSRWithInstrQueries() override {
    1205       37755 :     return Impl.LSRWithInstrQueries();
    1206             :   }
    1207        9673 :   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    1208       18655 :     return Impl.isTruncateFree(Ty1, Ty2);
    1209             :   }
    1210        3443 :   bool isProfitableToHoist(Instruction *I) override {
    1211        6820 :     return Impl.isProfitableToHoist(I);
    1212             :   }
    1213         612 :   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    1214           0 :   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    1215           0 :   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    1216         341 :   bool shouldBuildLookupTables() override {
    1217         662 :     return Impl.shouldBuildLookupTables();
    1218             :   }
    1219        1735 :   bool shouldBuildLookupTablesForConstant(Constant *C) override {
    1220        1735 :     return Impl.shouldBuildLookupTablesForConstant(C);
    1221             :   }
    1222         648 :   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
    1223             :                                     bool Extract) override {
    1224         648 :     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
    1225             :   }
    1226         691 :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1227             :                                             unsigned VF) override {
    1228         691 :     return Impl.getOperandsScalarizationOverhead(Args, VF);
    1229             :   }
    1230             : 
    1231         333 :   bool supportsEfficientVectorElementLoadStore() override {
    1232         333 :     return Impl.supportsEfficientVectorElementLoadStore();
    1233             :   }
    1234             : 
    1235          61 :   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    1236          61 :     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    1237             :   }
    1238         470 :   bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override {
    1239         470 :     return Impl.expandMemCmp(I, MaxLoadSize);
    1240             :   }
    1241         755 :   bool enableInterleavedAccessVectorization() override {
    1242         755 :     return Impl.enableInterleavedAccessVectorization();
    1243             :   }
    1244         124 :   bool isFPVectorizationPotentiallyUnsafe() override {
    1245         124 :     return Impl.isFPVectorizationPotentiallyUnsafe();
    1246             :   }
    1247         772 :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1248             :                                       unsigned BitWidth, unsigned AddressSpace,
    1249             :                                       unsigned Alignment, bool *Fast) override {
    1250             :     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
    1251         772 :                                                Alignment, Fast);
    1252             :   }
    1253        2163 :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    1254        2163 :     return Impl.getPopcntSupport(IntTyWidthInBit);
    1255             :   }
    1256          49 :   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    1257             : 
    1258        1026 :   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
    1259             : 
    1260          90 :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1261             :                             Type *Ty) override {
    1262          90 :     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
    1263             :   }
    1264          14 :   int getIntImmCost(const APInt &Imm, Type *Ty) override {
    1265          14 :     return Impl.getIntImmCost(Imm, Ty);
    1266             :   }
    1267      509888 :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1268             :                     Type *Ty) override {
    1269      509953 :     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    1270             :   }
    1271      153320 :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1272             :                     Type *Ty) override {
    1273      153324 :     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    1274             :   }
    1275      129341 :   unsigned getNumberOfRegisters(bool Vector) override {
    1276      138585 :     return Impl.getNumberOfRegisters(Vector);
    1277             :   }
    1278        9814 :   unsigned getRegisterBitWidth(bool Vector) const override {
    1279        9966 :     return Impl.getRegisterBitWidth(Vector);
    1280             :   }
    1281        8421 :   unsigned getMinVectorRegisterBitWidth() override {
    1282        8478 :     return Impl.getMinVectorRegisterBitWidth();
    1283             :   }
    1284       25357 :   bool shouldConsiderAddressTypePromotion(
    1285             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
    1286             :     return Impl.shouldConsiderAddressTypePromotion(
    1287       48568 :         I, AllowPromotionWithoutCommonHeader);
    1288             :   }
    1289          15 :   unsigned getCacheLineSize() override {
    1290          15 :     return Impl.getCacheLineSize();
    1291             :   }
    1292           8 :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
    1293          12 :     return Impl.getCacheSize(Level);
    1294             :   }
    1295          10 :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
    1296          12 :     return Impl.getCacheAssociativity(Level);
    1297             :   }
    1298       17909 :   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
    1299          68 :   unsigned getMinPrefetchStride() override {
    1300          68 :     return Impl.getMinPrefetchStride();
    1301             :   }
    1302         217 :   unsigned getMaxPrefetchIterationsAhead() override {
    1303         217 :     return Impl.getMaxPrefetchIterationsAhead();
    1304             :   }
    1305        1808 :   unsigned getMaxInterleaveFactor(unsigned VF) override {
    1306        1838 :     return Impl.getMaxInterleaveFactor(VF);
    1307             :   }
    1308        1236 :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1309             :                                             unsigned &JTSize) override {
    1310        1238 :     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
    1311             :   }
    1312             :   unsigned
    1313      148135 :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1314             :                          OperandValueKind Opd2Info,
    1315             :                          OperandValueProperties Opd1PropInfo,
    1316             :                          OperandValueProperties Opd2PropInfo,
    1317             :                          ArrayRef<const Value *> Args) override {
    1318             :     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    1319      148135 :                                        Opd1PropInfo, Opd2PropInfo, Args);
    1320             :   }
    1321        2271 :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1322             :                      Type *SubTp) override {
    1323        2290 :     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    1324             :   }
    1325        4819 :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1326             :                        const Instruction *I) override {
    1327        4819 :     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
    1328             :   }
    1329          20 :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
    1330             :                                unsigned Index) override {
    1331          20 :     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
    1332             :   }
    1333        8292 :   int getCFInstrCost(unsigned Opcode) override {
    1334        8292 :     return Impl.getCFInstrCost(Opcode);
    1335             :   }
    1336        4222 :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    1337             :                          const Instruction *I) override {
    1338        4222 :     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
    1339             :   }
    1340       30296 :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    1341       30296 :     return Impl.getVectorInstrCost(Opcode, Val, Index);
    1342             :   }
    1343      286641 :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1344             :                       unsigned AddressSpace, const Instruction *I) override {
    1345      286641 :     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
    1346             :   }
    1347         125 :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1348             :                             unsigned AddressSpace) override {
    1349         125 :     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    1350             :   }
    1351         103 :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1352             :                              Value *Ptr, bool VariableMask,
    1353             :                              unsigned Alignment) override {
    1354             :     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
    1355         103 :                                        Alignment);
    1356             :   }
    1357          66 :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
    1358             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
    1359             :                                  unsigned AddressSpace) override {
    1360             :     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
    1361          66 :                                            Alignment, AddressSpace);
    1362             :   }
    1363         224 :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1364             :                                  bool IsPairwiseForm) override {
    1365         224 :     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
    1366             :   }
    1367        1080 :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1368             :                              bool IsPairwiseForm, bool IsUnsigned) override {
    1369        1080 :     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
    1370             :    }
    1371         955 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
    1372             :                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
    1373             :     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
    1374         955 :                                       ScalarizationCostPassed);
    1375             :   }
    1376        2419 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1377             :        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
    1378        2419 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    1379             :   }
    1380         490 :   int getCallInstrCost(Function *F, Type *RetTy,
    1381             :                        ArrayRef<Type *> Tys) override {
    1382         490 :     return Impl.getCallInstrCost(F, RetTy, Tys);
    1383             :   }
    1384       12790 :   unsigned getNumberOfParts(Type *Tp) override {
    1385       23753 :     return Impl.getNumberOfParts(Tp);
    1386             :   }
    1387        1087 :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1388             :                                 const SCEV *Ptr) override {
    1389        1087 :     return Impl.getAddressComputationCost(Ty, SE, Ptr);
    1390             :   }
    1391       17749 :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    1392       17749 :     return Impl.getCostOfKeepingLiveOverCall(Tys);
    1393             :   }
    1394      180407 :   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1395             :                           MemIntrinsicInfo &Info) override {
    1396      180407 :     return Impl.getTgtMemIntrinsic(Inst, Info);
    1397             :   }
    1398           9 :   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
    1399           9 :     return Impl.getAtomicMemIntrinsicMaxElementSize();
    1400             :   }
    1401          20 :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1402             :                                            Type *ExpectedType) override {
    1403          20 :     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    1404             :   }
    1405          11 :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1406             :                                   unsigned SrcAlign,
    1407             :                                   unsigned DestAlign) const override {
    1408          22 :     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
    1409             :   }
    1410           0 :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
    1411             :                                          LLVMContext &Context,
    1412             :                                          unsigned RemainingBytes,
    1413             :                                          unsigned SrcAlign,
    1414             :                                          unsigned DestAlign) const override {
    1415           0 :     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
    1416             :                                            SrcAlign, DestAlign);
    1417           0 :   }
    1418      353239 :   bool areInlineCompatible(const Function *Caller,
    1419             :                            const Function *Callee) const override {
    1420      353239 :     return Impl.areInlineCompatible(Caller, Callee);
    1421             :   }
    1422       25170 :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
    1423       25170 :     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
    1424             :   }
    1425        9915 :   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
    1426        9915 :     return Impl.isLegalToVectorizeLoad(LI);
    1427             :   }
    1428       14398 :   bool isLegalToVectorizeStore(StoreInst *SI) const override {
    1429       14398 :     return Impl.isLegalToVectorizeStore(SI);
    1430             :   }
    1431         745 :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1432             :                                    unsigned Alignment,
    1433             :                                    unsigned AddrSpace) const override {
    1434             :     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
    1435         745 :                                             AddrSpace);
    1436             :   }
    1437         370 :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1438             :                                     unsigned Alignment,
    1439             :                                     unsigned AddrSpace) const override {
    1440             :     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
    1441         370 :                                              AddrSpace);
    1442             :   }
    1443         741 :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1444             :                                unsigned ChainSizeInBytes,
    1445             :                                VectorType *VecTy) const override {
    1446         741 :     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
    1447             :   }
    1448         322 :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1449             :                                 unsigned ChainSizeInBytes,
    1450             :                                 VectorType *VecTy) const override {
    1451         322 :     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
    1452             :   }
    1453         263 :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1454             :                              ReductionFlags Flags) const override {
    1455         263 :     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
    1456             :   }
    1457          44 :   bool shouldExpandReduction(const IntrinsicInst *II) const override {
    1458          44 :     return Impl.shouldExpandReduction(II);
    1459             :   }
    1460           3 :   int getInstructionLatency(const Instruction *I) override {
    1461           6 :     return Impl.getInstructionLatency(I);
    1462             :   }
    1463             : };
    1464             : 
    1465             : template <typename T>
    1466      414849 : TargetTransformInfo::TargetTransformInfo(T Impl)
    1467     6823392 :     : TTIImpl(new Model<T>(Impl)) {}
    1468             : 
    1469             : /// \brief Analysis pass providing the \c TargetTransformInfo.
    1470             : ///
    1471             : /// The core idea of the TargetIRAnalysis is to expose an interface through
    1472             : /// which LLVM targets can analyze and provide information about the middle
    1473             : /// end's target-independent IR. This supports use cases such as target-aware
    1474             : /// cost modeling of IR constructs.
    1475             : ///
    1476             : /// This is a function analysis because much of the cost modeling for targets
    1477             : /// is done in a subtarget specific way and LLVM supports compiling different
    1478             : /// functions targeting different subtargets in order to support runtime
    1479             : /// dispatch according to the observed subtarget.
    1480      255394 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
    1481             : public:
    1482             :   typedef TargetTransformInfo Result;
    1483             : 
    1484             :   /// \brief Default construct a target IR analysis.
    1485             :   ///
    1486             :   /// This will use the module's datalayout to construct a baseline
    1487             :   /// conservative TTI result.
    1488             :   TargetIRAnalysis();
    1489             : 
    1490             :   /// \brief Construct an IR analysis pass around a target-provide callback.
    1491             :   ///
    1492             :   /// The callback will be called with a particular function for which the TTI
    1493             :   /// is needed and must return a TTI object for that function.
    1494             :   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
    1495             : 
    1496             :   // Value semantics. We spell out the constructors for MSVC.
    1497             :   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    1498             :       : TTICallback(Arg.TTICallback) {}
    1499             :   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    1500      169790 :       : TTICallback(std::move(Arg.TTICallback)) {}
    1501             :   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    1502             :     TTICallback = RHS.TTICallback;
    1503             :     return *this;
    1504             :   }
    1505             :   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    1506             :     TTICallback = std::move(RHS.TTICallback);
    1507             :     return *this;
    1508             :   }
    1509             : 
    1510             :   Result run(const Function &F, FunctionAnalysisManager &);
    1511             : 
    1512             : private:
    1513             :   friend AnalysisInfoMixin<TargetIRAnalysis>;
    1514             :   static AnalysisKey Key;
    1515             : 
    1516             :   /// \brief The callback used to produce a result.
    1517             :   ///
    1518             :   /// We use a completely opaque callback so that targets can provide whatever
    1519             :   /// mechanism they desire for constructing the TTI for a given function.
    1520             :   ///
    1521             :   /// FIXME: Should we really use std::function? It's relatively inefficient.
    1522             :   /// It might be possible to arrange for even stateful callbacks to outlive
    1523             :   /// the analysis and thus use a function_ref which would be lighter weight.
    1524             :   /// This may also be less error prone as the callback is likely to reference
    1525             :   /// the external TargetMachine, and that reference needs to never dangle.
    1526             :   std::function<Result(const Function &)> TTICallback;
    1527             : 
    1528             :   /// \brief Helper function used as the callback in the default constructor.
    1529             :   static Result getDefaultTTI(const Function &F);
    1530             : };
    1531             : 
    1532             : /// \brief Wrapper pass for TargetTransformInfo.
    1533             : ///
    1534             : /// This pass can be constructed from a TTI object which it stores internally
    1535             : /// and is queried by passes.
    1536      168112 : class TargetTransformInfoWrapperPass : public ImmutablePass {
    1537             :   TargetIRAnalysis TIRA;
    1538             :   Optional<TargetTransformInfo> TTI;
    1539             : 
    1540             :   virtual void anchor();
    1541             : 
    1542             : public:
    1543             :   static char ID;
    1544             : 
    1545             :   /// \brief We must provide a default constructor for the pass but it should
    1546             :   /// never be used.
    1547             :   ///
    1548             :   /// Use the constructor below or call one of the creation routines.
    1549             :   TargetTransformInfoWrapperPass();
    1550             : 
    1551             :   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1552             : 
    1553             :   TargetTransformInfo &getTTI(const Function &F);
    1554             : };
    1555             : 
    1556             : /// \brief Create an analysis pass wrapper around a TTI object.
    1557             : ///
    1558             : /// This analysis pass just holds the TTI instance and makes it available to
    1559             : /// clients.
    1560             : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1561             : 
    1562             : } // End llvm namespace
    1563             : 
    1564             : #endif

Generated by: LCOV version 1.13