LCOV - code coverage report
Current view: top level - include/llvm/Analysis - TargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 200 220 90.9 %
Date: 2018-02-18 16:14:26 Functions: 539 1256 42.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file
      10             : /// This pass exposes codegen information to IR-level passes. Every
      11             : /// transformation that uses codegen information is broken into three parts:
      12             : /// 1. The IR-level analysis pass.
      13             : /// 2. The IR-level transformation interface which provides the needed
      14             : ///    information.
      15             : /// 3. Codegen-level implementation which uses target-specific hooks.
      16             : ///
      17             : /// This file defines #2, which is the interface that IR-level transformations
      18             : /// use for querying the codegen.
      19             : ///
      20             : //===----------------------------------------------------------------------===//
      21             : 
      22             : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      23             : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      24             : 
      25             : #include "llvm/ADT/Optional.h"
      26             : #include "llvm/IR/Operator.h"
      27             : #include "llvm/IR/PassManager.h"
      28             : #include "llvm/Pass.h"
      29             : #include "llvm/Support/AtomicOrdering.h"
      30             : #include "llvm/Support/DataTypes.h"
      31             : #include <functional>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : namespace Intrinsic {
      36             : enum ID : unsigned;
      37             : }
      38             : 
      39             : class Function;
      40             : class GlobalValue;
      41             : class IntrinsicInst;
      42             : class LoadInst;
      43             : class Loop;
      44             : class SCEV;
      45             : class ScalarEvolution;
      46             : class StoreInst;
      47             : class SwitchInst;
      48             : class Type;
      49             : class User;
      50             : class Value;
      51             : 
      52             : /// \brief Information about a load/store intrinsic defined by the target.
      53     2438650 : struct MemIntrinsicInfo {
      54             :   /// This is the pointer that the intrinsic is loading from or storing to.
      55             :   /// If this is non-null, then analysis/optimization passes can assume that
      56             :   /// this intrinsic is functionally equivalent to a load/store from this
      57             :   /// pointer.
      58             :   Value *PtrVal = nullptr;
      59             : 
      60             :   // Ordering for atomic operations.
      61             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
      62             : 
      63             :   // Same Id is set by the target for corresponding load/store intrinsics.
      64             :   unsigned short MatchingId = 0;
      65             : 
      66             :   bool ReadMem = false;
      67             :   bool WriteMem = false;
      68             :   bool IsVolatile = false;
      69             : 
      70             :   bool isUnordered() const {
      71         806 :     return (Ordering == AtomicOrdering::NotAtomic ||
      72         806 :             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
      73             :   }
      74             : };
      75             : 
      76             : /// \brief This pass provides access to the codegen interfaces that are needed
      77             : /// for IR-level transformations.
      78             : class TargetTransformInfo {
      79             : public:
      80             :   /// \brief Construct a TTI object using a type implementing the \c Concept
      81             :   /// API below.
      82             :   ///
      83             :   /// This is used by targets to construct a TTI wrapping their target-specific
      84             :   /// implementaion that encodes appropriate costs for their target.
      85             :   template <typename T> TargetTransformInfo(T Impl);
      86             : 
      87             :   /// \brief Construct a baseline TTI object using a minimal implementation of
      88             :   /// the \c Concept API below.
      89             :   ///
      90             :   /// The TTI implementation will reflect the information in the DataLayout
      91             :   /// provided if non-null.
      92             :   explicit TargetTransformInfo(const DataLayout &DL);
      93             : 
      94             :   // Provide move semantics.
      95             :   TargetTransformInfo(TargetTransformInfo &&Arg);
      96             :   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
      97             : 
      98             :   // We need to define the destructor out-of-line to define our sub-classes
      99             :   // out-of-line.
     100             :   ~TargetTransformInfo();
     101             : 
     102             :   /// \brief Handle the invalidation of this information.
     103             :   ///
     104             :   /// When used as a result of \c TargetIRAnalysis this method will be called
     105             :   /// when the function this was computed for changes. When it returns false,
     106             :   /// the information is preserved across those changes.
     107             :   bool invalidate(Function &, const PreservedAnalyses &,
     108             :                   FunctionAnalysisManager::Invalidator &) {
     109             :     // FIXME: We should probably in some way ensure that the subtarget
     110             :     // information for a function hasn't changed.
     111             :     return false;
     112             :   }
     113             : 
     114             :   /// \name Generic Target Information
     115             :   /// @{
     116             : 
     117             :   /// \brief The kind of cost model.
     118             :   ///
     119             :   /// There are several different cost models that can be customized by the
     120             :   /// target. The normalization of each cost model may be target specific.
     121             :   enum TargetCostKind {
     122             :     TCK_RecipThroughput, ///< Reciprocal throughput.
     123             :     TCK_Latency,         ///< The latency of instruction.
     124             :     TCK_CodeSize         ///< Instruction code size.
     125             :   };
     126             : 
     127             :   /// \brief Query the cost of a specified instruction.
     128             :   ///
     129             :   /// Clients should use this interface to query the cost of an existing
     130             :   /// instruction. The instruction must have a valid parent (basic block).
     131             :   ///
     132             :   /// Note, this method does not cache the cost calculation and it
     133             :   /// can be expensive in some cases.
     134       18035 :   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
     135       18035 :     switch (kind){
     136       18013 :     case TCK_RecipThroughput:
     137       18013 :       return getInstructionThroughput(I);
     138             : 
     139          11 :     case TCK_Latency:
     140          11 :       return getInstructionLatency(I);
     141             : 
     142          11 :     case TCK_CodeSize:
     143        1591 :       return getUserCost(I);
     144             :     }
     145           0 :     llvm_unreachable("Unknown instruction cost kind");
     146             :   }
     147             : 
     148             :   /// \brief Underlying constants for 'cost' values in this interface.
     149             :   ///
     150             :   /// Many APIs in this interface return a cost. This enum defines the
     151             :   /// fundamental values that should be used to interpret (and produce) those
     152             :   /// costs. The costs are returned as an int rather than a member of this
     153             :   /// enumeration because it is expected that the cost of one IR instruction
     154             :   /// may have a multiplicative factor to it or otherwise won't fit directly
     155             :   /// into the enum. Moreover, it is common to sum or average costs which works
     156             :   /// better as simple integral values. Thus this enum only provides constants.
     157             :   /// Also note that the returned costs are signed integers to make it natural
     158             :   /// to add, subtract, and test with zero (a common boundary condition). It is
     159             :   /// not expected that 2^32 is a realistic cost to be modeling at any point.
     160             :   ///
     161             :   /// Note that these costs should usually reflect the intersection of code-size
     162             :   /// cost and execution cost. A free instruction is typically one that folds
     163             :   /// into another instruction. For example, reg-to-reg moves can often be
     164             :   /// skipped by renaming the registers in the CPU, but they still are encoded
     165             :   /// and thus wouldn't be considered 'free' here.
     166             :   enum TargetCostConstants {
     167             :     TCC_Free = 0,     ///< Expected to fold away in lowering.
     168             :     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
     169             :     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
     170             :   };
     171             : 
     172             :   /// \brief Estimate the cost of a specific operation when lowered.
     173             :   ///
     174             :   /// Note that this is designed to work on an arbitrary synthetic opcode, and
     175             :   /// thus work for hypothetical queries before an instruction has even been
     176             :   /// formed. However, this does *not* work for GEPs, and must not be called
     177             :   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
     178             :   /// analyzing a GEP's cost required more information.
     179             :   ///
     180             :   /// Typically only the result type is required, and the operand type can be
     181             :   /// omitted. However, if the opcode is one of the cast instructions, the
     182             :   /// operand type is required.
     183             :   ///
     184             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     185             :   /// comments for a detailed explanation of the cost values.
     186             :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
     187             : 
     188             :   /// \brief Estimate the cost of a GEP operation when lowered.
     189             :   ///
     190             :   /// The contract for this function is the same as \c getOperationCost except
     191             :   /// that it supports an interface that provides extra information specific to
     192             :   /// the GEP operation.
     193             :   int getGEPCost(Type *PointeeType, const Value *Ptr,
     194             :                  ArrayRef<const Value *> Operands) const;
     195             : 
     196             :   /// \brief Estimate the cost of a EXT operation when lowered.
     197             :   ///
     198             :   /// The contract for this function is the same as \c getOperationCost except
     199             :   /// that it supports an interface that provides extra information specific to
     200             :   /// the EXT operation.
     201             :   int getExtCost(const Instruction *I, const Value *Src) const;
     202             : 
     203             :   /// \brief Estimate the cost of a function call when lowered.
     204             :   ///
     205             :   /// The contract for this is the same as \c getOperationCost except that it
     206             :   /// supports an interface that provides extra information specific to call
     207             :   /// instructions.
     208             :   ///
     209             :   /// This is the most basic query for estimating call cost: it only knows the
     210             :   /// function type and (potentially) the number of arguments at the call site.
     211             :   /// The latter is only interesting for varargs function types.
     212             :   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
     213             : 
     214             :   /// \brief Estimate the cost of calling a specific function when lowered.
     215             :   ///
     216             :   /// This overload adds the ability to reason about the particular function
     217             :   /// being called in the event it is a library call with special lowering.
     218             :   int getCallCost(const Function *F, int NumArgs = -1) const;
     219             : 
     220             :   /// \brief Estimate the cost of calling a specific function when lowered.
     221             :   ///
     222             :   /// This overload allows specifying a set of candidate argument values.
     223             :   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
     224             : 
     225             :   /// \returns A value by which our inlining threshold should be multiplied.
     226             :   /// This is primarily used to bump up the inlining threshold wholesale on
     227             :   /// targets where calls are unusually expensive.
     228             :   ///
     229             :   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
     230             :   /// individual classes of instructions would be better.
     231             :   unsigned getInliningThresholdMultiplier() const;
     232             : 
     233             :   /// \brief Estimate the cost of an intrinsic when lowered.
     234             :   ///
     235             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     236             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     237             :                        ArrayRef<Type *> ParamTys) const;
     238             : 
     239             :   /// \brief Estimate the cost of an intrinsic when lowered.
     240             :   ///
     241             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     242             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     243             :                        ArrayRef<const Value *> Arguments) const;
     244             : 
     245             :   /// \return The estimated number of case clusters when lowering \p 'SI'.
     246             :   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
     247             :   /// table.
     248             :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     249             :                                             unsigned &JTSize) const;
     250             : 
     251             :   /// \brief Estimate the cost of a given IR user when lowered.
     252             :   ///
     253             :   /// This can estimate the cost of either a ConstantExpr or Instruction when
     254             :   /// lowered. It has two primary advantages over the \c getOperationCost and
     255             :   /// \c getGEPCost above, and one significant disadvantage: it can only be
     256             :   /// used when the IR construct has already been formed.
     257             :   ///
     258             :   /// The advantages are that it can inspect the SSA use graph to reason more
     259             :   /// accurately about the cost. For example, all-constant-GEPs can often be
     260             :   /// folded into a load or other instruction, but if they are used in some
     261             :   /// other context they may not be folded. This routine can distinguish such
     262             :   /// cases.
     263             :   ///
     264             :   /// \p Operands is a list of operands which can be a result of transformations
     265             :   /// of the current operands. The number of the operands on the list must equal
     266             :   /// to the number of the current operands the IR user has. Their order on the
     267             :   /// list must be the same as the order of the current operands the IR user
     268             :   /// has.
     269             :   ///
     270             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     271             :   /// comments for a detailed explanation of the cost values.
     272             :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
     273             : 
     274             :   /// \brief This is a helper function which calls the two-argument getUserCost
     275             :   /// with \p Operands which are the current operands U has.
     276     3297604 :   int getUserCost(const User *U) const {
     277             :     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
     278             :                                            U->value_op_end());
     279     6595208 :     return getUserCost(U, Operands);
     280             :   }
     281             : 
     282             :   /// \brief Return true if branch divergence exists.
     283             :   ///
     284             :   /// Branch divergence has a significantly negative impact on GPU performance
     285             :   /// when threads in the same wavefront take different paths due to conditional
     286             :   /// branches.
     287             :   bool hasBranchDivergence() const;
     288             : 
     289             :   /// \brief Returns whether V is a source of divergence.
     290             :   ///
     291             :   /// This function provides the target-dependent information for
     292             :   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
     293             :   /// builds the dependency graph, and then runs the reachability algorithm
     294             :   /// starting with the sources of divergence.
     295             :   bool isSourceOfDivergence(const Value *V) const;
     296             : 
     297             :   // \brief Returns true for the target specific
     298             :   // set of operations which produce uniform result
     299             :   // even taking non-unform arguments
     300             :   bool isAlwaysUniform(const Value *V) const;
     301             : 
     302             :   /// Returns the address space ID for a target's 'flat' address space. Note
     303             :   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
     304             :   /// refers to as the generic address space. The flat address space is a
     305             :   /// generic address space that can be used access multiple segments of memory
     306             :   /// with different address spaces. Access of a memory location through a
     307             :   /// pointer with this address space is expected to be legal but slower
     308             :   /// compared to the same memory location accessed through a pointer with a
     309             :   /// different address space.
     310             :   //
     311             :   /// This is for targets with different pointer representations which can
     312             :   /// be converted with the addrspacecast instruction. If a pointer is converted
     313             :   /// to this address space, optimizations should attempt to replace the access
     314             :   /// with the source address space.
     315             :   ///
     316             :   /// \returns ~0u if the target does not have such a flat address space to
     317             :   /// optimize away.
     318             :   unsigned getFlatAddressSpace() const;
     319             : 
     320             :   /// \brief Test whether calls to a function lower to actual program function
     321             :   /// calls.
     322             :   ///
     323             :   /// The idea is to test whether the program is likely to require a 'call'
     324             :   /// instruction or equivalent in order to call the given function.
     325             :   ///
     326             :   /// FIXME: It's not clear that this is a good or useful query API. Client's
     327             :   /// should probably move to simpler cost metrics using the above.
     328             :   /// Alternatively, we could split the cost interface into distinct code-size
     329             :   /// and execution-speed costs. This would allow modelling the core of this
     330             :   /// query more accurately as a call is a single small instruction, but
     331             :   /// incurs significant execution cost.
     332             :   bool isLoweredToCall(const Function *F) const;
     333             : 
     334             :   struct LSRCost {
     335             :     /// TODO: Some of these could be merged. Also, a lexical ordering
     336             :     /// isn't always optimal.
     337             :     unsigned Insns;
     338             :     unsigned NumRegs;
     339             :     unsigned AddRecCost;
     340             :     unsigned NumIVMuls;
     341             :     unsigned NumBaseAdds;
     342             :     unsigned ImmCost;
     343             :     unsigned SetupCost;
     344             :     unsigned ScaleCost;
     345             :   };
     346             : 
     347             :   /// Parameters that control the generic loop unrolling transformation.
     348             :   struct UnrollingPreferences {
     349             :     /// The cost threshold for the unrolled loop. Should be relative to the
     350             :     /// getUserCost values returned by this API, and the expectation is that
     351             :     /// the unrolled loop's instructions when run through that interface should
     352             :     /// not exceed this cost. However, this is only an estimate. Also, specific
     353             :     /// loops may be unrolled even with a cost above this threshold if deemed
     354             :     /// profitable. Set this to UINT_MAX to disable the loop body cost
     355             :     /// restriction.
     356             :     unsigned Threshold;
     357             :     /// If complete unrolling will reduce the cost of the loop, we will boost
     358             :     /// the Threshold by a certain percent to allow more aggressive complete
     359             :     /// unrolling. This value provides the maximum boost percentage that we
     360             :     /// can apply to Threshold (The value should be no less than 100).
     361             :     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
     362             :     ///                                    MaxPercentThresholdBoost / 100)
     363             :     /// E.g. if complete unrolling reduces the loop execution time by 50%
     364             :     /// then we boost the threshold by the factor of 2x. If unrolling is not
     365             :     /// expected to reduce the running time, then we do not increase the
     366             :     /// threshold.
     367             :     unsigned MaxPercentThresholdBoost;
     368             :     /// The cost threshold for the unrolled loop when optimizing for size (set
     369             :     /// to UINT_MAX to disable).
     370             :     unsigned OptSizeThreshold;
     371             :     /// The cost threshold for the unrolled loop, like Threshold, but used
     372             :     /// for partial/runtime unrolling (set to UINT_MAX to disable).
     373             :     unsigned PartialThreshold;
     374             :     /// The cost threshold for the unrolled loop when optimizing for size, like
     375             :     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
     376             :     /// UINT_MAX to disable).
     377             :     unsigned PartialOptSizeThreshold;
     378             :     /// A forced unrolling factor (the number of concatenated bodies of the
     379             :     /// original loop in the unrolled loop body). When set to 0, the unrolling
     380             :     /// transformation will select an unrolling factor based on the current cost
     381             :     /// threshold and other factors.
     382             :     unsigned Count;
     383             :     /// A forced peeling factor (the number of bodied of the original loop
     384             :     /// that should be peeled off before the loop body). When set to 0, the
     385             :     /// unrolling transformation will select a peeling factor based on profile
     386             :     /// information and other factors.
     387             :     unsigned PeelCount;
     388             :     /// Default unroll count for loops with run-time trip count.
     389             :     unsigned DefaultUnrollRuntimeCount;
     390             :     // Set the maximum unrolling factor. The unrolling factor may be selected
     391             :     // using the appropriate cost threshold, but may not exceed this number
     392             :     // (set to UINT_MAX to disable). This does not apply in cases where the
     393             :     // loop is being fully unrolled.
     394             :     unsigned MaxCount;
     395             :     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     396             :     /// applies even if full unrolling is selected. This allows a target to fall
     397             :     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     398             :     unsigned FullUnrollMaxCount;
     399             :     // Represents number of instructions optimized when "back edge"
     400             :     // becomes "fall through" in unrolled loop.
     401             :     // For now we count a conditional branch on a backedge and a comparison
     402             :     // feeding it.
     403             :     unsigned BEInsns;
     404             :     /// Allow partial unrolling (unrolling of loops to expand the size of the
     405             :     /// loop body, not only to eliminate small constant-trip-count loops).
     406             :     bool Partial;
     407             :     /// Allow runtime unrolling (unrolling of loops to expand the size of the
     408             :     /// loop body even when the number of loop iterations is not known at
     409             :     /// compile time).
     410             :     bool Runtime;
     411             :     /// Allow generation of a loop remainder (extra iterations after unroll).
     412             :     bool AllowRemainder;
     413             :     /// Allow emitting expensive instructions (such as divisions) when computing
     414             :     /// the trip count of a loop for runtime unrolling.
     415             :     bool AllowExpensiveTripCount;
     416             :     /// Apply loop unroll on any kind of loop
     417             :     /// (mainly to loops that fail runtime unrolling).
     418             :     bool Force;
     419             :     /// Allow using trip count upper bound to unroll loops.
     420             :     bool UpperBound;
     421             :     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     422             :     bool AllowPeeling;
     423             :     /// Allow unrolling of all the iterations of the runtime loop remainder.
     424             :     bool UnrollRemainder;
     425             :   };
     426             : 
     427             :   /// \brief Get target-customized preferences for the generic loop unrolling
     428             :   /// transformation. The caller will initialize UP with the current
     429             :   /// target-independent defaults.
     430             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     431             :                                UnrollingPreferences &UP) const;
     432             : 
     433             :   /// @}
     434             : 
     435             :   /// \name Scalar Target Information
     436             :   /// @{
     437             : 
     438             :   /// \brief Flags indicating the kind of support for population count.
     439             :   ///
     440             :   /// Compared to the SW implementation, HW support is supposed to
     441             :   /// significantly boost the performance when the population is dense, and it
     442             :   /// may or may not degrade performance if the population is sparse. A HW
     443             :   /// support is considered as "Fast" if it can outperform, or is on a par
     444             :   /// with, SW implementation when the population is sparse; otherwise, it is
     445             :   /// considered as "Slow".
     446             :   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
     447             : 
     448             :   /// \brief Return true if the specified immediate is legal add immediate, that
     449             :   /// is the target has add instructions which can add a register with the
     450             :   /// immediate without having to materialize the immediate into a register.
     451             :   bool isLegalAddImmediate(int64_t Imm) const;
     452             : 
     453             :   /// \brief Return true if the specified immediate is legal icmp immediate,
     454             :   /// that is the target has icmp instructions which can compare a register
     455             :   /// against the immediate without having to materialize the immediate into a
     456             :   /// register.
     457             :   bool isLegalICmpImmediate(int64_t Imm) const;
     458             : 
     459             :   /// \brief Return true if the addressing mode represented by AM is legal for
     460             :   /// this target, for a load/store of the specified type.
     461             :   /// The type may be VoidTy, in which case only return true if the addressing
     462             :   /// mode is legal for a load/store of any legal type.
     463             :   /// If target returns true in LSRWithInstrQueries(), I may be valid.
     464             :   /// TODO: Handle pre/postinc as well.
     465             :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     466             :                              bool HasBaseReg, int64_t Scale,
     467             :                              unsigned AddrSpace = 0,
     468             :                              Instruction *I = nullptr) const;
     469             : 
     470             :   /// \brief Return true if LSR cost of C1 is lower than C1.
     471             :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     472             :                      TargetTransformInfo::LSRCost &C2) const;
     473             : 
     474             :   /// Return true if the target can fuse a compare and branch.
     475             :   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
     476             :   /// calculation for the instructions in a loop.
     477             :   bool canMacroFuseCmp() const;
     478             : 
     479             :   /// \brief Return true if the target supports masked load/store
     480             :   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
     481             :   bool isLegalMaskedStore(Type *DataType) const;
     482             :   bool isLegalMaskedLoad(Type *DataType) const;
     483             : 
     484             :   /// \brief Return true if the target supports masked gather/scatter
     485             :   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
     486             :   /// bits scalar type.
     487             :   bool isLegalMaskedScatter(Type *DataType) const;
     488             :   bool isLegalMaskedGather(Type *DataType) const;
     489             : 
     490             :   /// Return true if the target has a unified operation to calculate division
     491             :   /// and remainder. If so, the additional implicit multiplication and
     492             :   /// subtraction required to calculate a remainder from division are free. This
     493             :   /// can enable more aggressive transformations for division and remainder than
     494             :   /// would typically be allowed using throughput or size cost models.
     495             :   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
     496             : 
     497             :   /// Return true if the given instruction (assumed to be a memory access
     498             :   /// instruction) has a volatile variant. If that's the case then we can avoid
     499             :   /// addrspacecast to generic AS for volatile loads/stores. Default
     500             :   /// implementation returns false, which prevents address space inference for
     501             :   /// volatile loads/stores.
     502             :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
     503             : 
     504             :   /// Return true if target doesn't mind addresses in vectors.
     505             :   bool prefersVectorizedAddressing() const;
     506             : 
     507             :   /// \brief Return the cost of the scaling factor used in the addressing
     508             :   /// mode represented by AM for this target, for a load/store
     509             :   /// of the specified type.
     510             :   /// If the AM is supported, the return value must be >= 0.
     511             :   /// If the AM is not supported, it returns a negative value.
     512             :   /// TODO: Handle pre/postinc as well.
     513             :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     514             :                            bool HasBaseReg, int64_t Scale,
     515             :                            unsigned AddrSpace = 0) const;
     516             : 
     517             :   /// \brief Return true if the loop strength reduce pass should make
     518             :   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
     519             :   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
     520             :   /// immediate offset and no index register.
     521             :   bool LSRWithInstrQueries() const;
     522             : 
     523             :   /// \brief Return true if it's free to truncate a value of type Ty1 to type
     524             :   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
     525             :   /// by referencing its sub-register AX.
     526             :   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
     527             : 
     528             :   /// \brief Return true if it is profitable to hoist instruction in the
     529             :   /// then/else to before if.
     530             :   bool isProfitableToHoist(Instruction *I) const;
     531             : 
     532             :   /// \brief Return true if this type is legal.
     533             :   bool isTypeLegal(Type *Ty) const;
     534             : 
     535             :   /// \brief Returns the target's jmp_buf alignment in bytes.
     536             :   unsigned getJumpBufAlignment() const;
     537             : 
     538             :   /// \brief Returns the target's jmp_buf size in bytes.
     539             :   unsigned getJumpBufSize() const;
     540             : 
     541             :   /// \brief Return true if switches should be turned into lookup tables for the
     542             :   /// target.
     543             :   bool shouldBuildLookupTables() const;
     544             : 
     545             :   /// \brief Return true if switches should be turned into lookup tables
     546             :   /// containing this constant value for the target.
     547             :   bool shouldBuildLookupTablesForConstant(Constant *C) const;
     548             : 
     549             :   /// \brief Return true if the input function which is cold at all call sites,
     550             :   ///  should use coldcc calling convention.
     551             :   bool useColdCCForColdCall(Function &F) const;
     552             : 
     553             :   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
     554             : 
     555             :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     556             :                                             unsigned VF) const;
     557             : 
     558             :   /// If target has efficient vector element load/store instructions, it can
     559             :   /// return true here so that insertion/extraction costs are not added to
     560             :   /// the scalarization cost of a load/store.
     561             :   bool supportsEfficientVectorElementLoadStore() const;
     562             : 
     563             :   /// \brief Don't restrict interleaved unrolling to small loops.
     564             :   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
     565             : 
     566             :   /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
     567             :   /// true if this is the expansion of memcmp(p1, p2, s) == 0.
     568       29102 :   struct MemCmpExpansionOptions {
     569             :     // The list of available load sizes (in bytes), sorted in decreasing order.
     570             :     SmallVector<unsigned, 8> LoadSizes;
     571             :   };
     572             :   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
     573             : 
     574             :   /// \brief Enable matching of interleaved access groups.
     575             :   bool enableInterleavedAccessVectorization() const;
     576             : 
     577             :   /// \brief Indicate that it is potentially unsafe to automatically vectorize
     578             :   /// floating-point operations because the semantics of vector and scalar
     579             :   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
     580             :   /// does not support IEEE-754 denormal numbers, while depending on the
     581             :   /// platform, scalar floating-point math does.
     582             :   /// This applies to floating-point math operations and calls, not memory
     583             :   /// operations, shuffles, or casts.
     584             :   bool isFPVectorizationPotentiallyUnsafe() const;
     585             : 
     586             :   /// \brief Determine if the target supports unaligned memory accesses.
     587             :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     588             :                                       unsigned BitWidth, unsigned AddressSpace = 0,
     589             :                                       unsigned Alignment = 1,
     590             :                                       bool *Fast = nullptr) const;
     591             : 
     592             :   /// \brief Return hardware support for population count.
     593             :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
     594             : 
     595             :   /// \brief Return true if the hardware has a fast square-root instruction.
     596             :   bool haveFastSqrt(Type *Ty) const;
     597             : 
     598             :   /// Return true if it is faster to check if a floating-point value is NaN
     599             :   /// (or not-NaN) versus a comparison against a constant FP zero value.
     600             :   /// Targets should override this if materializing a 0.0 for comparison is
     601             :   /// generally as cheap as checking for ordered/unordered.
     602             :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
     603             : 
     604             :   /// \brief Return the expected cost of supporting the floating point operation
     605             :   /// of the specified type.
     606             :   int getFPOpCost(Type *Ty) const;
     607             : 
     608             :   /// \brief Return the expected cost of materializing for the given integer
     609             :   /// immediate of the specified type.
     610             :   int getIntImmCost(const APInt &Imm, Type *Ty) const;
     611             : 
     612             :   /// \brief Return the expected cost of materialization for the given integer
     613             :   /// immediate of the specified type for a given instruction. The cost can be
     614             :   /// zero if the immediate can be folded into the specified instruction.
     615             :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     616             :                     Type *Ty) const;
     617             :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     618             :                     Type *Ty) const;
     619             : 
     620             :   /// \brief Return the expected cost for the given integer when optimising
     621             :   /// for size. This is different than the other integer immediate cost
     622             :   /// functions in that it is subtarget agnostic. This is useful when you e.g.
     623             :   /// target one ISA such as Aarch32 but smaller encodings could be possible
     624             :   /// with another such as Thumb. This return value is used as a penalty when
     625             :   /// the total costs for a constant is calculated (the bigger the cost, the
     626             :   /// more beneficial constant hoisting is).
     627             :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     628             :                             Type *Ty) const;
     629             :   /// @}
     630             : 
     631             :   /// \name Vector Target Information
     632             :   /// @{
     633             : 
     634             :   /// \brief The various kinds of shuffle patterns for vector queries.
     635             :   enum ShuffleKind {
     636             :     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     637             :     SK_Reverse,         ///< Reverse the order of the vector.
     638             :     SK_Alternate,       ///< Choose alternate elements from vector.
     639             :     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     640             :     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
     641             :     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
     642             :                         ///< with any shuffle mask.
     643             :     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
     644             :                         ///< shuffle mask.
     645             :   };
     646             : 
     647             :   /// \brief Additional information about an operand's possible values.
     648             :   enum OperandValueKind {
     649             :     OK_AnyValue,               // Operand can have any value.
     650             :     OK_UniformValue,           // Operand is uniform (splat of a value).
     651             :     OK_UniformConstantValue,   // Operand is uniform constant.
     652             :     OK_NonUniformConstantValue // Operand is a non uniform constant value.
     653             :   };
     654             : 
     655             :   /// \brief Additional properties of an operand's values.
     656             :   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
     657             : 
     658             :   /// \return The number of scalar or vector registers that the target has.
     659             :   /// If 'Vectors' is true, it returns the number of vector registers. If it is
     660             :   /// set to false, it returns the number of scalar registers.
     661             :   unsigned getNumberOfRegisters(bool Vector) const;
     662             : 
     663             :   /// \return The width of the largest scalar or vector register type.
     664             :   unsigned getRegisterBitWidth(bool Vector) const;
     665             : 
     666             :   /// \return The width of the smallest vector register type.
     667             :   unsigned getMinVectorRegisterBitWidth() const;
     668             : 
     669             :   /// \return True if it should be considered for address type promotion.
     670             :   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
     671             :   /// profitable without finding other extensions fed by the same input.
     672             :   bool shouldConsiderAddressTypePromotion(
     673             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
     674             : 
     675             :   /// \return The size of a cache line in bytes.
     676             :   unsigned getCacheLineSize() const;
     677             : 
     678             :   /// The possible cache levels
     679             :   enum class CacheLevel {
     680             :     L1D,   // The L1 data cache
     681             :     L2D,   // The L2 data cache
     682             : 
     683             :     // We currently do not model L3 caches, as their sizes differ widely between
     684             :     // microarchitectures. Also, we currently do not have a use for L3 cache
     685             :     // size modeling yet.
     686             :   };
     687             : 
     688             :   /// \return The size of the cache level in bytes, if available.
     689             :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
     690             : 
     691             :   /// \return The associativity of the cache level, if available.
     692             :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
     693             : 
     694             :   /// \return How much before a load we should place the prefetch instruction.
     695             :   /// This is currently measured in number of instructions.
     696             :   unsigned getPrefetchDistance() const;
     697             : 
     698             :   /// \return Some HW prefetchers can handle accesses up to a certain constant
     699             :   /// stride.  This is the minimum stride in bytes where it makes sense to start
     700             :   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
     701             :   unsigned getMinPrefetchStride() const;
     702             : 
     703             :   /// \return The maximum number of iterations to prefetch ahead.  If the
     704             :   /// required number of iterations is more than this number, no prefetching is
     705             :   /// performed.
     706             :   unsigned getMaxPrefetchIterationsAhead() const;
     707             : 
     708             :   /// \return The maximum interleave factor that any transform should try to
     709             :   /// perform for this target. This number depends on the level of parallelism
     710             :   /// and the number of execution units in the CPU.
     711             :   unsigned getMaxInterleaveFactor(unsigned VF) const;
     712             : 
     713             :   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
     714             :   /// \p Args is an optional argument which holds the instruction operands  
     715             :   /// values so the TTI can analyize those values searching for special 
     716             :   /// cases\optimizations based on those values.
     717             :   int getArithmeticInstrCost(
     718             :       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
     719             :       OperandValueKind Opd2Info = OK_AnyValue,
     720             :       OperandValueProperties Opd1PropInfo = OP_None,
     721             :       OperandValueProperties Opd2PropInfo = OP_None,
     722             :       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
     723             : 
     724             :   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
     725             :   /// The index and subtype parameters are used by the subvector insertion and
     726             :   /// extraction shuffle kinds.
     727             :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
     728             :                      Type *SubTp = nullptr) const;
     729             : 
     730             :   /// \return The expected cost of cast instructions, such as bitcast, trunc,
     731             :   /// zext, etc. If there is an existing instruction that holds Opcode, it
     732             :   /// may be passed in the 'I' parameter.
     733             :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     734             :                        const Instruction *I = nullptr) const;
     735             : 
     736             :   /// \return The expected cost of a sign- or zero-extended vector extract. Use
     737             :   /// -1 to indicate that there is no information about the index value.
     738             :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
     739             :                                unsigned Index = -1) const;
     740             : 
     741             :   /// \return The expected cost of control-flow related instructions such as
     742             :   /// Phi, Ret, Br.
     743             :   int getCFInstrCost(unsigned Opcode) const;
     744             : 
     745             :   /// \returns The expected cost of compare and select instructions. If there
     746             :   /// is an existing instruction that holds Opcode, it may be passed in the
     747             :   /// 'I' parameter.
     748             :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     749             :                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
     750             : 
     751             :   /// \return The expected cost of vector Insert and Extract.
     752             :   /// Use -1 to indicate that there is no information on the index value.
     753             :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
     754             : 
     755             :   /// \return The cost of Load and Store instructions.
     756             :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     757             :                       unsigned AddressSpace, const Instruction *I = nullptr) const;
     758             : 
     759             :   /// \return The cost of masked Load and Store instructions.
     760             :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     761             :                             unsigned AddressSpace) const;
     762             : 
     763             :   /// \return The cost of Gather or Scatter operation
     764             :   /// \p Opcode - is a type of memory access Load or Store
     765             :   /// \p DataTy - a vector type of the data to be loaded or stored
     766             :   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
     767             :   /// \p VariableMask - true when the memory access is predicated with a mask
     768             :   ///                   that is not a compile-time constant
     769             :   /// \p Alignment - alignment of single element
     770             :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
     771             :                              bool VariableMask, unsigned Alignment) const;
     772             : 
     773             :   /// \return The cost of the interleaved memory operation.
     774             :   /// \p Opcode is the memory operation code
     775             :   /// \p VecTy is the vector type of the interleaved access.
     776             :   /// \p Factor is the interleave factor
     777             :   /// \p Indices is the indices for interleaved load members (as interleaved
     778             :   ///    load allows gaps)
     779             :   /// \p Alignment is the alignment of the memory operation
     780             :   /// \p AddressSpace is address space of the pointer.
     781             :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
     782             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
     783             :                                  unsigned AddressSpace) const;
     784             : 
     785             :   /// \brief Calculate the cost of performing a vector reduction.
     786             :   ///
     787             :   /// This is the cost of reducing the vector value of type \p Ty to a scalar
     788             :   /// value using the operation denoted by \p Opcode. The form of the reduction
     789             :   /// can either be a pairwise reduction or a reduction that splits the vector
     790             :   /// at every reduction level.
     791             :   ///
     792             :   /// Pairwise:
     793             :   ///  (v0, v1, v2, v3)
     794             :   ///  ((v0+v1), (v2+v3), undef, undef)
     795             :   /// Split:
     796             :   ///  (v0, v1, v2, v3)
     797             :   ///  ((v0+v2), (v1+v3), undef, undef)
     798             :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
     799             :                                  bool IsPairwiseForm) const;
     800             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
     801             :                              bool IsUnsigned) const;
     802             : 
     803             :   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
     804             :   /// Three cases are handled: 1. scalar instruction 2. vector instruction
     805             :   /// 3. scalar instruction which is to be vectorized with VF.
     806             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     807             :                             ArrayRef<Value *> Args, FastMathFlags FMF,
     808             :                             unsigned VF = 1) const;
     809             : 
     810             :   /// \returns The cost of Intrinsic instructions. Types analysis only.
     811             :   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
     812             :   /// arguments and the return value will be computed based on types.
     813             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     814             :                             ArrayRef<Type *> Tys, FastMathFlags FMF,
     815             :                             unsigned ScalarizationCostPassed = UINT_MAX) const;
     816             : 
     817             :   /// \returns The cost of Call instructions.
     818             :   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
     819             : 
     820             :   /// \returns The number of pieces into which the provided type must be
     821             :   /// split during legalization. Zero is returned when the answer is unknown.
     822             :   unsigned getNumberOfParts(Type *Tp) const;
     823             : 
     824             :   /// \returns The cost of the address computation. For most targets this can be
     825             :   /// merged into the instruction indexing mode. Some targets might want to
     826             :   /// distinguish between address computation for memory operations on vector
     827             :   /// types and scalar types. Such targets should override this function.
     828             :   /// The 'SE' parameter holds pointer for the scalar evolution object which
     829             :   /// is used in order to get the Ptr step value in case of constant stride.
     830             :   /// The 'Ptr' parameter holds SCEV of the access pointer.
     831             :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
     832             :                                 const SCEV *Ptr = nullptr) const;
     833             : 
     834             :   /// \returns The cost, if any, of keeping values of the given types alive
     835             :   /// over a callsite.
     836             :   ///
     837             :   /// Some types may require the use of register classes that do not have
     838             :   /// any callee-saved registers, so would require a spill and fill.
     839             :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
     840             : 
     841             :   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
     842             :   /// will contain additional information - whether the intrinsic may write
     843             :   /// or read to memory, volatility and the pointer.  Info is undefined
     844             :   /// if false is returned.
     845             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     846             : 
     847             :   /// \returns The maximum element size, in bytes, for an element
     848             :   /// unordered-atomic memory intrinsic.
     849             :   unsigned getAtomicMemIntrinsicMaxElementSize() const;
     850             : 
     851             :   /// \returns A value which is the result of the given memory intrinsic.  New
     852             :   /// instructions may be created to extract the result from the given intrinsic
     853             :   /// memory operation.  Returns nullptr if the target cannot create a result
     854             :   /// from the given intrinsic.
     855             :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     856             :                                            Type *ExpectedType) const;
     857             : 
     858             :   /// \returns The type to use in a loop expansion of a memcpy call.
     859             :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
     860             :                                   unsigned SrcAlign, unsigned DestAlign) const;
     861             : 
     862             :   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
     863             :   /// \param RemainingBytes The number of bytes to copy.
     864             :   ///
     865             :   /// Calculates the operand types to use when copying \p RemainingBytes of
     866             :   /// memory, where source and destination alignments are \p SrcAlign and
     867             :   /// \p DestAlign respectively.
     868             :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
     869             :                                          LLVMContext &Context,
     870             :                                          unsigned RemainingBytes,
     871             :                                          unsigned SrcAlign,
     872             :                                          unsigned DestAlign) const;
     873             : 
     874             :   /// \returns True if the two functions have compatible attributes for inlining
     875             :   /// purposes.
     876             :   bool areInlineCompatible(const Function *Caller,
     877             :                            const Function *Callee) const;
     878             : 
     879             :   /// \returns The bitwidth of the largest vector type that should be used to
     880             :   /// load/store in the given address space.
     881             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     882             : 
     883             :   /// \returns True if the load instruction is legal to vectorize.
     884             :   bool isLegalToVectorizeLoad(LoadInst *LI) const;
     885             : 
     886             :   /// \returns True if the store instruction is legal to vectorize.
     887             :   bool isLegalToVectorizeStore(StoreInst *SI) const;
     888             : 
     889             :   /// \returns True if it is legal to vectorize the given load chain.
     890             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     891             :                                    unsigned Alignment,
     892             :                                    unsigned AddrSpace) const;
     893             : 
     894             :   /// \returns True if it is legal to vectorize the given store chain.
     895             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     896             :                                     unsigned Alignment,
     897             :                                     unsigned AddrSpace) const;
     898             : 
     899             :   /// \returns The new vector factor value if the target doesn't support \p
     900             :   /// SizeInBytes loads or has a better vector factor.
     901             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     902             :                                unsigned ChainSizeInBytes,
     903             :                                VectorType *VecTy) const;
     904             : 
     905             :   /// \returns The new vector factor value if the target doesn't support \p
     906             :   /// SizeInBytes stores or has a better vector factor.
     907             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     908             :                                 unsigned ChainSizeInBytes,
     909             :                                 VectorType *VecTy) const;
     910             : 
     911             :   /// Flags describing the kind of vector reduction.
     912             :   struct ReductionFlags {
     913         178 :     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
     914             :     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
     915             :     bool IsSigned; ///< Whether the operation is a signed int reduction.
     916             :     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
     917             :   };
     918             : 
     919             :   /// \returns True if the target wants to handle the given reduction idiom in
     920             :   /// the intrinsics form instead of the shuffle form.
     921             :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
     922             :                              ReductionFlags Flags) const;
     923             : 
     924             :   /// \returns True if the target wants to expand the given reduction intrinsic
     925             :   /// into a shuffle sequence.
     926             :   bool shouldExpandReduction(const IntrinsicInst *II) const;
     927             :   /// @}
     928             : 
     929             : private:
     930             :   /// \brief Estimate the latency of specified instruction.
     931             :   /// Returns 1 as the default value.
     932             :   int getInstructionLatency(const Instruction *I) const;
     933             : 
     934             :   /// \brief Returns the expected throughput cost of the instruction.
     935             :   /// Returns -1 if the cost is unknown.
     936             :   int getInstructionThroughput(const Instruction *I) const;
     937             : 
     938             :   /// \brief The abstract base class used to type erase specific TTI
     939             :   /// implementations.
     940             :   class Concept;
     941             : 
     942             :   /// \brief The template model for the base class which wraps a concrete
     943             :   /// implementation in a type erased interface.
     944             :   template <typename T> class Model;
     945             : 
     946             :   std::unique_ptr<Concept> TTIImpl;
     947             : };
     948             : 
     949             : class TargetTransformInfo::Concept {
     950             : public:
     951             :   virtual ~Concept() = 0;
     952             :   virtual const DataLayout &getDataLayout() const = 0;
     953             :   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
     954             :   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
     955             :                          ArrayRef<const Value *> Operands) = 0;
     956             :   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
     957             :   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
     958             :   virtual int getCallCost(const Function *F, int NumArgs) = 0;
     959             :   virtual int getCallCost(const Function *F,
     960             :                           ArrayRef<const Value *> Arguments) = 0;
     961             :   virtual unsigned getInliningThresholdMultiplier() = 0;
     962             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     963             :                                ArrayRef<Type *> ParamTys) = 0;
     964             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     965             :                                ArrayRef<const Value *> Arguments) = 0;
     966             :   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     967             :                                                     unsigned &JTSize) = 0;
     968             :   virtual int
     969             :   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
     970             :   virtual bool hasBranchDivergence() = 0;
     971             :   virtual bool isSourceOfDivergence(const Value *V) = 0;
     972             :   virtual bool isAlwaysUniform(const Value *V) = 0;
     973             :   virtual unsigned getFlatAddressSpace() = 0;
     974             :   virtual bool isLoweredToCall(const Function *F) = 0;
     975             :   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     976             :                                        UnrollingPreferences &UP) = 0;
     977             :   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
     978             :   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
     979             :   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
     980             :                                      int64_t BaseOffset, bool HasBaseReg,
     981             :                                      int64_t Scale,
     982             :                                      unsigned AddrSpace,
     983             :                                      Instruction *I) = 0;
     984             :   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     985             :                              TargetTransformInfo::LSRCost &C2) = 0;
     986             :   virtual bool canMacroFuseCmp() = 0;
     987             :   virtual bool isLegalMaskedStore(Type *DataType) = 0;
     988             :   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
     989             :   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
     990             :   virtual bool isLegalMaskedGather(Type *DataType) = 0;
     991             :   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
     992             :   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
     993             :   virtual bool prefersVectorizedAddressing() = 0;
     994             :   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
     995             :                                    int64_t BaseOffset, bool HasBaseReg,
     996             :                                    int64_t Scale, unsigned AddrSpace) = 0;
     997             :   virtual bool LSRWithInstrQueries() = 0;
     998             :   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
     999             :   virtual bool isProfitableToHoist(Instruction *I) = 0;
    1000             :   virtual bool isTypeLegal(Type *Ty) = 0;
    1001             :   virtual unsigned getJumpBufAlignment() = 0;
    1002             :   virtual unsigned getJumpBufSize() = 0;
    1003             :   virtual bool shouldBuildLookupTables() = 0;
    1004             :   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
    1005             :   virtual bool useColdCCForColdCall(Function &F) = 0;
    1006             :   virtual unsigned
    1007             :   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
    1008             :   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1009             :                                                     unsigned VF) = 0;
    1010             :   virtual bool supportsEfficientVectorElementLoadStore() = 0;
    1011             :   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    1012             :   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
    1013             :       bool IsZeroCmp) const = 0;
    1014             :   virtual bool enableInterleavedAccessVectorization() = 0;
    1015             :   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
    1016             :   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1017             :                                               unsigned BitWidth,
    1018             :                                               unsigned AddressSpace,
    1019             :                                               unsigned Alignment,
    1020             :                                               bool *Fast) = 0;
    1021             :   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    1022             :   virtual bool haveFastSqrt(Type *Ty) = 0;
    1023             :   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
    1024             :   virtual int getFPOpCost(Type *Ty) = 0;
    1025             :   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1026             :                                     Type *Ty) = 0;
    1027             :   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    1028             :   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1029             :                             Type *Ty) = 0;
    1030             :   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1031             :                             Type *Ty) = 0;
    1032             :   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    1033             :   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
    1034             :   virtual unsigned getMinVectorRegisterBitWidth() = 0;
    1035             :   virtual bool shouldConsiderAddressTypePromotion(
    1036             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
    1037             :   virtual unsigned getCacheLineSize() = 0;
    1038             :   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
    1039             :   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
    1040             :   virtual unsigned getPrefetchDistance() = 0;
    1041             :   virtual unsigned getMinPrefetchStride() = 0;
    1042             :   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    1043             :   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    1044             :   virtual unsigned
    1045             :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1046             :                          OperandValueKind Opd2Info,
    1047             :                          OperandValueProperties Opd1PropInfo,
    1048             :                          OperandValueProperties Opd2PropInfo,
    1049             :                          ArrayRef<const Value *> Args) = 0;
    1050             :   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1051             :                              Type *SubTp) = 0;
    1052             :   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1053             :                                const Instruction *I) = 0;
    1054             :   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
    1055             :                                        VectorType *VecTy, unsigned Index) = 0;
    1056             :   virtual int getCFInstrCost(unsigned Opcode) = 0;
    1057             :   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    1058             :                                 Type *CondTy, const Instruction *I) = 0;
    1059             :   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
    1060             :                                  unsigned Index) = 0;
    1061             :   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1062             :                               unsigned AddressSpace, const Instruction *I) = 0;
    1063             :   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    1064             :                                     unsigned Alignment,
    1065             :                                     unsigned AddressSpace) = 0;
    1066             :   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1067             :                                      Value *Ptr, bool VariableMask,
    1068             :                                      unsigned Alignment) = 0;
    1069             :   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
    1070             :                                          unsigned Factor,
    1071             :                                          ArrayRef<unsigned> Indices,
    1072             :                                          unsigned Alignment,
    1073             :                                          unsigned AddressSpace) = 0;
    1074             :   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1075             :                                          bool IsPairwiseForm) = 0;
    1076             :   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1077             :                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
    1078             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1079             :                       ArrayRef<Type *> Tys, FastMathFlags FMF,
    1080             :                       unsigned ScalarizationCostPassed) = 0;
    1081             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1082             :          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    1083             :   virtual int getCallInstrCost(Function *F, Type *RetTy,
    1084             :                                ArrayRef<Type *> Tys) = 0;
    1085             :   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    1086             :   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1087             :                                         const SCEV *Ptr) = 0;
    1088             :   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    1089             :   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1090             :                                   MemIntrinsicInfo &Info) = 0;
    1091             :   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
    1092             :   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1093             :                                                    Type *ExpectedType) = 0;
    1094             :   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1095             :                                           unsigned SrcAlign,
    1096             :                                           unsigned DestAlign) const = 0;
    1097             :   virtual void getMemcpyLoopResidualLoweringType(
    1098             :       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    1099             :       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
    1100             :   virtual bool areInlineCompatible(const Function *Caller,
    1101             :                                    const Function *Callee) const = 0;
    1102             :   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
    1103             :   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
    1104             :   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
    1105             :   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1106             :                                            unsigned Alignment,
    1107             :                                            unsigned AddrSpace) const = 0;
    1108             :   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1109             :                                             unsigned Alignment,
    1110             :                                             unsigned AddrSpace) const = 0;
    1111             :   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1112             :                                        unsigned ChainSizeInBytes,
    1113             :                                        VectorType *VecTy) const = 0;
    1114             :   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1115             :                                         unsigned ChainSizeInBytes,
    1116             :                                         VectorType *VecTy) const = 0;
    1117             :   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1118             :                                      ReductionFlags) const = 0;
    1119             :   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
    1120             :   virtual int getInstructionLatency(const Instruction *I) = 0;
    1121             : };
    1122             : 
    1123             : template <typename T>
    1124             : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    1125             :   T Impl;
    1126             : 
    1127             : public:
    1128     2251817 :   Model(T Impl) : Impl(std::move(Impl)) {}
    1129     2251723 :   ~Model() override {}
    1130             : 
    1131           0 :   const DataLayout &getDataLayout() const override {
    1132           0 :     return Impl.getDataLayout();
    1133             :   }
    1134             : 
    1135           0 :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    1136           0 :     return Impl.getOperationCost(Opcode, Ty, OpTy);
    1137             :   }
    1138       33093 :   int getGEPCost(Type *PointeeType, const Value *Ptr,
    1139             :                  ArrayRef<const Value *> Operands) override {
    1140       33093 :     return Impl.getGEPCost(PointeeType, Ptr, Operands);
    1141             :   }
    1142           0 :   int getExtCost(const Instruction *I, const Value *Src) override {
    1143           0 :     return Impl.getExtCost(I, Src);
    1144             :   }
    1145           0 :   int getCallCost(FunctionType *FTy, int NumArgs) override {
    1146           0 :     return Impl.getCallCost(FTy, NumArgs);
    1147             :   }
    1148           0 :   int getCallCost(const Function *F, int NumArgs) override {
    1149           0 :     return Impl.getCallCost(F, NumArgs);
    1150             :   }
    1151           0 :   int getCallCost(const Function *F,
    1152             :                   ArrayRef<const Value *> Arguments) override {
    1153           0 :     return Impl.getCallCost(F, Arguments);
    1154             :   }
    1155      236628 :   unsigned getInliningThresholdMultiplier() override {
    1156      236628 :     return Impl.getInliningThresholdMultiplier();
    1157             :   }
    1158           0 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1159             :                        ArrayRef<Type *> ParamTys) override {
    1160           0 :     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    1161             :   }
    1162           4 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1163             :                        ArrayRef<const Value *> Arguments) override {
    1164           4 :     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    1165             :   }
    1166     3520219 :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
    1167     3520219 :     return Impl.getUserCost(U, Operands);
    1168             :   }
    1169      100693 :   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    1170      571813 :   bool isSourceOfDivergence(const Value *V) override {
    1171      571813 :     return Impl.isSourceOfDivergence(V);
    1172             :   }
    1173             : 
    1174      168124 :   bool isAlwaysUniform(const Value *V) override {
    1175      168124 :     return Impl.isAlwaysUniform(V);
    1176             :   }
    1177             : 
    1178       20268 :   unsigned getFlatAddressSpace() override {
    1179       20268 :     return Impl.getFlatAddressSpace();
    1180             :   }
    1181             : 
    1182      949378 :   bool isLoweredToCall(const Function *F) override {
    1183      949378 :     return Impl.isLoweredToCall(F);
    1184             :   }
    1185        6200 :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
    1186             :                                UnrollingPreferences &UP) override {
    1187        6200 :     return Impl.getUnrollingPreferences(L, SE, UP);
    1188             :   }
    1189       18101 :   bool isLegalAddImmediate(int64_t Imm) override {
    1190       18101 :     return Impl.isLegalAddImmediate(Imm);
    1191             :   }
    1192       23146 :   bool isLegalICmpImmediate(int64_t Imm) override {
    1193       23146 :     return Impl.isLegalICmpImmediate(Imm);
    1194             :   }
    1195      343590 :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1196             :                              bool HasBaseReg, int64_t Scale,
    1197             :                              unsigned AddrSpace,
    1198             :                              Instruction *I) override {
    1199             :     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    1200      343590 :                                       Scale, AddrSpace, I);
    1201             :   }
    1202       68933 :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1203             :                      TargetTransformInfo::LSRCost &C2) override {
    1204       68933 :     return Impl.isLSRCostLess(C1, C2);
    1205             :   }
    1206       22760 :   bool canMacroFuseCmp() override {
    1207       22760 :     return Impl.canMacroFuseCmp();
    1208             :   }
    1209         145 :   bool isLegalMaskedStore(Type *DataType) override {
    1210         145 :     return Impl.isLegalMaskedStore(DataType);
    1211             :   }
    1212         259 :   bool isLegalMaskedLoad(Type *DataType) override {
    1213         259 :     return Impl.isLegalMaskedLoad(DataType);
    1214             :   }
    1215         276 :   bool isLegalMaskedScatter(Type *DataType) override {
    1216         276 :     return Impl.isLegalMaskedScatter(DataType);
    1217             :   }
    1218         762 :   bool isLegalMaskedGather(Type *DataType) override {
    1219         762 :     return Impl.isLegalMaskedGather(DataType);
    1220             :   }
    1221          32 :   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    1222          32 :     return Impl.hasDivRemOp(DataType, IsSigned);
    1223             :   }
    1224         555 :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
    1225         555 :     return Impl.hasVolatileVariant(I, AddrSpace);
    1226             :   }
    1227        1120 :   bool prefersVectorizedAddressing() override {
    1228        1120 :     return Impl.prefersVectorizedAddressing();
    1229             :   }
    1230       71140 :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1231             :                            bool HasBaseReg, int64_t Scale,
    1232             :                            unsigned AddrSpace) override {
    1233             :     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
    1234       71140 :                                      Scale, AddrSpace);
    1235             :   }
    1236       41977 :   bool LSRWithInstrQueries() override {
    1237       41977 :     return Impl.LSRWithInstrQueries();
    1238             :   }
    1239       10375 :   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    1240       10375 :     return Impl.isTruncateFree(Ty1, Ty2);
    1241             :   }
    1242        3411 :   bool isProfitableToHoist(Instruction *I) override {
    1243        3411 :     return Impl.isProfitableToHoist(I);
    1244             :   }
    1245         646 :   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    1246           0 :   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    1247           0 :   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    1248         444 :   bool shouldBuildLookupTables() override {
    1249         444 :     return Impl.shouldBuildLookupTables();
    1250             :   }
    1251        1102 :   bool shouldBuildLookupTablesForConstant(Constant *C) override {
    1252        1102 :     return Impl.shouldBuildLookupTablesForConstant(C);
    1253             :   }
    1254           2 :   bool useColdCCForColdCall(Function &F) override {
    1255           2 :     return Impl.useColdCCForColdCall(F);
    1256             :   }
    1257             : 
    1258         671 :   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
    1259             :                                     bool Extract) override {
    1260         671 :     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
    1261             :   }
    1262         710 :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1263             :                                             unsigned VF) override {
    1264         710 :     return Impl.getOperandsScalarizationOverhead(Args, VF);
    1265             :   }
    1266             : 
    1267         348 :   bool supportsEfficientVectorElementLoadStore() override {
    1268         348 :     return Impl.supportsEfficientVectorElementLoadStore();
    1269             :   }
    1270             : 
    1271          61 :   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    1272          61 :     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    1273             :   }
    1274      160046 :   const MemCmpExpansionOptions *enableMemCmpExpansion(
    1275             :       bool IsZeroCmp) const override {
    1276      160046 :     return Impl.enableMemCmpExpansion(IsZeroCmp);
    1277             :   }
    1278         857 :   bool enableInterleavedAccessVectorization() override {
    1279         857 :     return Impl.enableInterleavedAccessVectorization();
    1280             :   }
    1281         125 :   bool isFPVectorizationPotentiallyUnsafe() override {
    1282         125 :     return Impl.isFPVectorizationPotentiallyUnsafe();
    1283             :   }
    1284         907 :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1285             :                                       unsigned BitWidth, unsigned AddressSpace,
    1286             :                                       unsigned Alignment, bool *Fast) override {
    1287             :     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
    1288         907 :                                                Alignment, Fast);
    1289             :   }
    1290        2339 :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    1291        2339 :     return Impl.getPopcntSupport(IntTyWidthInBit);
    1292             :   }
    1293          58 :   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    1294             : 
    1295          27 :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
    1296          27 :     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
    1297             :   }
    1298             : 
    1299         704 :   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
    1300             : 
    1301         106 :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1302             :                             Type *Ty) override {
    1303         106 :     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
    1304             :   }
    1305          84 :   int getIntImmCost(const APInt &Imm, Type *Ty) override {
    1306          84 :     return Impl.getIntImmCost(Imm, Ty);
    1307             :   }
    1308      545143 :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1309             :                     Type *Ty) override {
    1310      545208 :     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    1311             :   }
    1312      139415 :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1313             :                     Type *Ty) override {
    1314      139419 :     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    1315             :   }
    1316      148148 :   unsigned getNumberOfRegisters(bool Vector) override {
    1317      148148 :     return Impl.getNumberOfRegisters(Vector);
    1318             :   }
    1319        9914 :   unsigned getRegisterBitWidth(bool Vector) const override {
    1320        9914 :     return Impl.getRegisterBitWidth(Vector);
    1321             :   }
    1322        8380 :   unsigned getMinVectorRegisterBitWidth() override {
    1323        8380 :     return Impl.getMinVectorRegisterBitWidth();
    1324             :   }
    1325       29194 :   bool shouldConsiderAddressTypePromotion(
    1326             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
    1327             :     return Impl.shouldConsiderAddressTypePromotion(
    1328       29194 :         I, AllowPromotionWithoutCommonHeader);
    1329             :   }
    1330          51 :   unsigned getCacheLineSize() override {
    1331          51 :     return Impl.getCacheLineSize();
    1332             :   }
    1333           8 :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
    1334           8 :     return Impl.getCacheSize(Level);
    1335             :   }
    1336          10 :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
    1337          10 :     return Impl.getCacheAssociativity(Level);
    1338             :   }
    1339       19484 :   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
    1340          78 :   unsigned getMinPrefetchStride() override {
    1341          78 :     return Impl.getMinPrefetchStride();
    1342             :   }
    1343         223 :   unsigned getMaxPrefetchIterationsAhead() override {
    1344         223 :     return Impl.getMaxPrefetchIterationsAhead();
    1345             :   }
    1346        1877 :   unsigned getMaxInterleaveFactor(unsigned VF) override {
    1347        1877 :     return Impl.getMaxInterleaveFactor(VF);
    1348             :   }
    1349        1545 :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1350             :                                             unsigned &JTSize) override {
    1351        1545 :     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
    1352             :   }
    1353             :   unsigned
    1354      150764 :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1355             :                          OperandValueKind Opd2Info,
    1356             :                          OperandValueProperties Opd1PropInfo,
    1357             :                          OperandValueProperties Opd2PropInfo,
    1358             :                          ArrayRef<const Value *> Args) override {
    1359             :     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    1360      150764 :                                        Opd1PropInfo, Opd2PropInfo, Args);
    1361             :   }
    1362        2430 :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1363             :                      Type *SubTp) override {
    1364        2449 :     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    1365             :   }
    1366        4924 :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1367             :                        const Instruction *I) override {
    1368        4924 :     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
    1369             :   }
    1370          20 :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
    1371             :                                unsigned Index) override {
    1372          20 :     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
    1373             :   }
    1374        8722 :   int getCFInstrCost(unsigned Opcode) override {
    1375        8722 :     return Impl.getCFInstrCost(Opcode);
    1376             :   }
    1377        4299 :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    1378             :                          const Instruction *I) override {
    1379        4299 :     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
    1380             :   }
    1381       33769 :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    1382       33769 :     return Impl.getVectorInstrCost(Opcode, Val, Index);
    1383             :   }
    1384      291964 :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1385             :                       unsigned AddressSpace, const Instruction *I) override {
    1386      291964 :     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
    1387             :   }
    1388         126 :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1389             :                             unsigned AddressSpace) override {
    1390         126 :     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    1391             :   }
    1392         104 :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1393             :                              Value *Ptr, bool VariableMask,
    1394             :                              unsigned Alignment) override {
    1395             :     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
    1396         104 :                                        Alignment);
    1397             :   }
    1398          67 :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
    1399             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
    1400             :                                  unsigned AddressSpace) override {
    1401             :     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
    1402          67 :                                            Alignment, AddressSpace);
    1403             :   }
    1404         230 :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1405             :                                  bool IsPairwiseForm) override {
    1406         230 :     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
    1407             :   }
    1408         888 :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1409             :                              bool IsPairwiseForm, bool IsUnsigned) override {
    1410         888 :     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
    1411             :    }
    1412         955 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
    1413             :                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
    1414             :     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
    1415         955 :                                       ScalarizationCostPassed);
    1416             :   }
    1417        2292 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1418             :        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
    1419        2292 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    1420             :   }
    1421         491 :   int getCallInstrCost(Function *F, Type *RetTy,
    1422             :                        ArrayRef<Type *> Tys) override {
    1423         491 :     return Impl.getCallInstrCost(F, RetTy, Tys);
    1424             :   }
    1425       11956 :   unsigned getNumberOfParts(Type *Tp) override {
    1426       11956 :     return Impl.getNumberOfParts(Tp);
    1427             :   }
    1428        1196 :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1429             :                                 const SCEV *Ptr) override {
    1430        1196 :     return Impl.getAddressComputationCost(Ty, SE, Ptr);
    1431             :   }
    1432       18212 :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    1433       18212 :     return Impl.getCostOfKeepingLiveOverCall(Tys);
    1434             :   }
    1435      194877 :   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1436             :                           MemIntrinsicInfo &Info) override {
    1437      194877 :     return Impl.getTgtMemIntrinsic(Inst, Info);
    1438             :   }
    1439           9 :   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
    1440           9 :     return Impl.getAtomicMemIntrinsicMaxElementSize();
    1441             :   }
    1442          29 :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1443             :                                            Type *ExpectedType) override {
    1444          29 :     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    1445             :   }
    1446          15 :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1447             :                                   unsigned SrcAlign,
    1448             :                                   unsigned DestAlign) const override {
    1449          15 :     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
    1450             :   }
    1451           0 :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
    1452             :                                          LLVMContext &Context,
    1453             :                                          unsigned RemainingBytes,
    1454             :                                          unsigned SrcAlign,
    1455             :                                          unsigned DestAlign) const override {
    1456           0 :     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
    1457             :                                            SrcAlign, DestAlign);
    1458           0 :   }
    1459      365457 :   bool areInlineCompatible(const Function *Caller,
    1460             :                            const Function *Callee) const override {
    1461      365457 :     return Impl.areInlineCompatible(Caller, Callee);
    1462             :   }
    1463       27773 :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
    1464       27773 :     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
    1465             :   }
    1466       10983 :   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
    1467       10983 :     return Impl.isLegalToVectorizeLoad(LI);
    1468             :   }
    1469       15767 :   bool isLegalToVectorizeStore(StoreInst *SI) const override {
    1470       15767 :     return Impl.isLegalToVectorizeStore(SI);
    1471             :   }
    1472         901 :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1473             :                                    unsigned Alignment,
    1474             :                                    unsigned AddrSpace) const override {
    1475             :     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
    1476         901 :                                             AddrSpace);
    1477             :   }
    1478         432 :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1479             :                                     unsigned Alignment,
    1480             :                                     unsigned AddrSpace) const override {
    1481             :     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
    1482         432 :                                              AddrSpace);
    1483             :   }
    1484         897 :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1485             :                                unsigned ChainSizeInBytes,
    1486             :                                VectorType *VecTy) const override {
    1487         897 :     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
    1488             :   }
    1489         380 :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1490             :                                 unsigned ChainSizeInBytes,
    1491             :                                 VectorType *VecTy) const override {
    1492         380 :     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
    1493             :   }
    1494         276 :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1495             :                              ReductionFlags Flags) const override {
    1496         276 :     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
    1497             :   }
    1498          44 :   bool shouldExpandReduction(const IntrinsicInst *II) const override {
    1499          44 :     return Impl.shouldExpandReduction(II);
    1500             :   }
    1501          11 :   int getInstructionLatency(const Instruction *I) override {
    1502          11 :     return Impl.getInstructionLatency(I);
    1503             :   }
    1504             : };
    1505             : 
    1506             : template <typename T>
    1507      529107 : TargetTransformInfo::TargetTransformInfo(T Impl)
    1508     2766724 :     : TTIImpl(new Model<T>(Impl)) {}
    1509             : 
    1510             : /// \brief Analysis pass providing the \c TargetTransformInfo.
    1511             : ///
    1512             : /// The core idea of the TargetIRAnalysis is to expose an interface through
    1513             : /// which LLVM targets can analyze and provide information about the middle
    1514             : /// end's target-independent IR. This supports use cases such as target-aware
    1515             : /// cost modeling of IR constructs.
    1516             : ///
    1517             : /// This is a function analysis because much of the cost modeling for targets
    1518             : /// is done in a subtarget specific way and LLVM supports compiling different
    1519             : /// functions targeting different subtargets in order to support runtime
    1520             : /// dispatch according to the observed subtarget.
    1521       60858 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
    1522             : public:
    1523             :   typedef TargetTransformInfo Result;
    1524             : 
    1525             :   /// \brief Default construct a target IR analysis.
    1526             :   ///
    1527             :   /// This will use the module's datalayout to construct a baseline
    1528             :   /// conservative TTI result.
    1529             :   TargetIRAnalysis();
    1530             : 
    1531             :   /// \brief Construct an IR analysis pass around a target-provide callback.
    1532             :   ///
    1533             :   /// The callback will be called with a particular function for which the TTI
    1534             :   /// is needed and must return a TTI object for that function.
    1535             :   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
    1536             : 
    1537             :   // Value semantics. We spell out the constructors for MSVC.
    1538             :   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    1539             :       : TTICallback(Arg.TTICallback) {}
    1540             :   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    1541             :       : TTICallback(std::move(Arg.TTICallback)) {}
    1542             :   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    1543             :     TTICallback = RHS.TTICallback;
    1544             :     return *this;
    1545             :   }
    1546             :   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    1547             :     TTICallback = std::move(RHS.TTICallback);
    1548             :     return *this;
    1549             :   }
    1550             : 
    1551             :   Result run(const Function &F, FunctionAnalysisManager &);
    1552             : 
    1553             : private:
    1554             :   friend AnalysisInfoMixin<TargetIRAnalysis>;
    1555             :   static AnalysisKey Key;
    1556             : 
    1557             :   /// \brief The callback used to produce a result.
    1558             :   ///
    1559             :   /// We use a completely opaque callback so that targets can provide whatever
    1560             :   /// mechanism they desire for constructing the TTI for a given function.
    1561             :   ///
    1562             :   /// FIXME: Should we really use std::function? It's relatively inefficient.
    1563             :   /// It might be possible to arrange for even stateful callbacks to outlive
    1564             :   /// the analysis and thus use a function_ref which would be lighter weight.
    1565             :   /// This may also be less error prone as the callback is likely to reference
    1566             :   /// the external TargetMachine, and that reference needs to never dangle.
    1567             :   std::function<Result(const Function &)> TTICallback;
    1568             : 
    1569             :   /// \brief Helper function used as the callback in the default constructor.
    1570             :   static Result getDefaultTTI(const Function &F);
    1571             : };
    1572             : 
    1573             : /// \brief Wrapper pass for TargetTransformInfo.
    1574             : ///
    1575             : /// This pass can be constructed from a TTI object which it stores internally
    1576             : /// and is queried by passes.
    1577      182574 : class TargetTransformInfoWrapperPass : public ImmutablePass {
    1578             :   TargetIRAnalysis TIRA;
    1579             :   Optional<TargetTransformInfo> TTI;
    1580             : 
    1581             :   virtual void anchor();
    1582             : 
    1583             : public:
    1584             :   static char ID;
    1585             : 
    1586             :   /// \brief We must provide a default constructor for the pass but it should
    1587             :   /// never be used.
    1588             :   ///
    1589             :   /// Use the constructor below or call one of the creation routines.
    1590             :   TargetTransformInfoWrapperPass();
    1591             : 
    1592             :   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1593             : 
    1594             :   TargetTransformInfo &getTTI(const Function &F);
    1595             : };
    1596             : 
    1597             : /// \brief Create an analysis pass wrapper around a TTI object.
    1598             : ///
    1599             : /// This analysis pass just holds the TTI instance and makes it available to
    1600             : /// clients.
    1601             : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1602             : 
    1603             : } // End llvm namespace
    1604             : 
    1605             : #endif

Generated by: LCOV version 1.13