LCOV - code coverage report
Current view: top level - include/llvm/Analysis - TargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 209 231 90.5 %
Date: 2018-07-13 00:08:38 Functions: 645 1549 41.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file
      10             : /// This pass exposes codegen information to IR-level passes. Every
      11             : /// transformation that uses codegen information is broken into three parts:
      12             : /// 1. The IR-level analysis pass.
      13             : /// 2. The IR-level transformation interface which provides the needed
      14             : ///    information.
      15             : /// 3. Codegen-level implementation which uses target-specific hooks.
      16             : ///
      17             : /// This file defines #2, which is the interface that IR-level transformations
      18             : /// use for querying the codegen.
      19             : ///
      20             : //===----------------------------------------------------------------------===//
      21             : 
      22             : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      23             : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      24             : 
      25             : #include "llvm/ADT/Optional.h"
      26             : #include "llvm/IR/Operator.h"
      27             : #include "llvm/IR/PassManager.h"
      28             : #include "llvm/Pass.h"
      29             : #include "llvm/Support/AtomicOrdering.h"
      30             : #include "llvm/Support/DataTypes.h"
      31             : #include <functional>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : namespace Intrinsic {
      36             : enum ID : unsigned;
      37             : }
      38             : 
      39             : class Function;
      40             : class GlobalValue;
      41             : class IntrinsicInst;
      42             : class LoadInst;
      43             : class Loop;
      44             : class SCEV;
      45             : class ScalarEvolution;
      46             : class StoreInst;
      47             : class SwitchInst;
      48             : class Type;
      49             : class User;
      50             : class Value;
      51             : 
      52             : /// Information about a load/store intrinsic defined by the target.
      53     2499730 : struct MemIntrinsicInfo {
      54             :   /// This is the pointer that the intrinsic is loading from or storing to.
      55             :   /// If this is non-null, then analysis/optimization passes can assume that
      56             :   /// this intrinsic is functionally equivalent to a load/store from this
      57             :   /// pointer.
      58             :   Value *PtrVal = nullptr;
      59             : 
      60             :   // Ordering for atomic operations.
      61             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
      62             : 
      63             :   // Same Id is set by the target for corresponding load/store intrinsics.
      64             :   unsigned short MatchingId = 0;
      65             : 
      66             :   bool ReadMem = false;
      67             :   bool WriteMem = false;
      68             :   bool IsVolatile = false;
      69             : 
      70             :   bool isUnordered() const {
      71         806 :     return (Ordering == AtomicOrdering::NotAtomic ||
      72         806 :             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
      73             :   }
      74             : };
      75             : 
      76             : /// This pass provides access to the codegen interfaces that are needed
      77             : /// for IR-level transformations.
      78             : class TargetTransformInfo {
      79             : public:
      80             :   /// Construct a TTI object using a type implementing the \c Concept
      81             :   /// API below.
      82             :   ///
      83             :   /// This is used by targets to construct a TTI wrapping their target-specific
      84             :   /// implementaion that encodes appropriate costs for their target.
      85             :   template <typename T> TargetTransformInfo(T Impl);
      86             : 
      87             :   /// Construct a baseline TTI object using a minimal implementation of
      88             :   /// the \c Concept API below.
      89             :   ///
      90             :   /// The TTI implementation will reflect the information in the DataLayout
      91             :   /// provided if non-null.
      92             :   explicit TargetTransformInfo(const DataLayout &DL);
      93             : 
      94             :   // Provide move semantics.
      95             :   TargetTransformInfo(TargetTransformInfo &&Arg);
      96             :   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
      97             : 
      98             :   // We need to define the destructor out-of-line to define our sub-classes
      99             :   // out-of-line.
     100             :   ~TargetTransformInfo();
     101             : 
     102             :   /// Handle the invalidation of this information.
     103             :   ///
     104             :   /// When used as a result of \c TargetIRAnalysis this method will be called
     105             :   /// when the function this was computed for changes. When it returns false,
     106             :   /// the information is preserved across those changes.
     107             :   bool invalidate(Function &, const PreservedAnalyses &,
     108             :                   FunctionAnalysisManager::Invalidator &) {
     109             :     // FIXME: We should probably in some way ensure that the subtarget
     110             :     // information for a function hasn't changed.
     111             :     return false;
     112             :   }
     113             : 
     114             :   /// \name Generic Target Information
     115             :   /// @{
     116             : 
     117             :   /// The kind of cost model.
     118             :   ///
     119             :   /// There are several different cost models that can be customized by the
     120             :   /// target. The normalization of each cost model may be target specific.
     121             :   enum TargetCostKind {
     122             :     TCK_RecipThroughput, ///< Reciprocal throughput.
     123             :     TCK_Latency,         ///< The latency of instruction.
     124             :     TCK_CodeSize         ///< Instruction code size.
     125             :   };
     126             : 
     127             :   /// Query the cost of a specified instruction.
     128             :   ///
     129             :   /// Clients should use this interface to query the cost of an existing
     130             :   /// instruction. The instruction must have a valid parent (basic block).
     131             :   ///
     132             :   /// Note, this method does not cache the cost calculation and it
     133             :   /// can be expensive in some cases.
     134       27081 :   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
     135       27081 :     switch (kind){
     136       27059 :     case TCK_RecipThroughput:
     137       27059 :       return getInstructionThroughput(I);
     138             : 
     139          11 :     case TCK_Latency:
     140          11 :       return getInstructionLatency(I);
     141             : 
     142          11 :     case TCK_CodeSize:
     143        1632 :       return getUserCost(I);
     144             :     }
     145           0 :     llvm_unreachable("Unknown instruction cost kind");
     146             :   }
     147             : 
     148             :   /// Underlying constants for 'cost' values in this interface.
     149             :   ///
     150             :   /// Many APIs in this interface return a cost. This enum defines the
     151             :   /// fundamental values that should be used to interpret (and produce) those
     152             :   /// costs. The costs are returned as an int rather than a member of this
     153             :   /// enumeration because it is expected that the cost of one IR instruction
     154             :   /// may have a multiplicative factor to it or otherwise won't fit directly
     155             :   /// into the enum. Moreover, it is common to sum or average costs which works
     156             :   /// better as simple integral values. Thus this enum only provides constants.
     157             :   /// Also note that the returned costs are signed integers to make it natural
     158             :   /// to add, subtract, and test with zero (a common boundary condition). It is
     159             :   /// not expected that 2^32 is a realistic cost to be modeling at any point.
     160             :   ///
     161             :   /// Note that these costs should usually reflect the intersection of code-size
     162             :   /// cost and execution cost. A free instruction is typically one that folds
     163             :   /// into another instruction. For example, reg-to-reg moves can often be
     164             :   /// skipped by renaming the registers in the CPU, but they still are encoded
     165             :   /// and thus wouldn't be considered 'free' here.
     166             :   enum TargetCostConstants {
     167             :     TCC_Free = 0,     ///< Expected to fold away in lowering.
     168             :     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
     169             :     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
     170             :   };
     171             : 
     172             :   /// Estimate the cost of a specific operation when lowered.
     173             :   ///
     174             :   /// Note that this is designed to work on an arbitrary synthetic opcode, and
     175             :   /// thus work for hypothetical queries before an instruction has even been
     176             :   /// formed. However, this does *not* work for GEPs, and must not be called
     177             :   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
     178             :   /// analyzing a GEP's cost required more information.
     179             :   ///
     180             :   /// Typically only the result type is required, and the operand type can be
     181             :   /// omitted. However, if the opcode is one of the cast instructions, the
     182             :   /// operand type is required.
     183             :   ///
     184             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     185             :   /// comments for a detailed explanation of the cost values.
     186             :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
     187             : 
     188             :   /// Estimate the cost of a GEP operation when lowered.
     189             :   ///
     190             :   /// The contract for this function is the same as \c getOperationCost except
     191             :   /// that it supports an interface that provides extra information specific to
     192             :   /// the GEP operation.
     193             :   int getGEPCost(Type *PointeeType, const Value *Ptr,
     194             :                  ArrayRef<const Value *> Operands) const;
     195             : 
     196             :   /// Estimate the cost of a EXT operation when lowered.
     197             :   ///
     198             :   /// The contract for this function is the same as \c getOperationCost except
     199             :   /// that it supports an interface that provides extra information specific to
     200             :   /// the EXT operation.
     201             :   int getExtCost(const Instruction *I, const Value *Src) const;
     202             : 
     203             :   /// Estimate the cost of a function call when lowered.
     204             :   ///
     205             :   /// The contract for this is the same as \c getOperationCost except that it
     206             :   /// supports an interface that provides extra information specific to call
     207             :   /// instructions.
     208             :   ///
     209             :   /// This is the most basic query for estimating call cost: it only knows the
     210             :   /// function type and (potentially) the number of arguments at the call site.
     211             :   /// The latter is only interesting for varargs function types.
     212             :   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
     213             : 
     214             :   /// Estimate the cost of calling a specific function when lowered.
     215             :   ///
     216             :   /// This overload adds the ability to reason about the particular function
     217             :   /// being called in the event it is a library call with special lowering.
     218             :   int getCallCost(const Function *F, int NumArgs = -1) const;
     219             : 
     220             :   /// Estimate the cost of calling a specific function when lowered.
     221             :   ///
     222             :   /// This overload allows specifying a set of candidate argument values.
     223             :   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
     224             : 
     225             :   /// \returns A value by which our inlining threshold should be multiplied.
     226             :   /// This is primarily used to bump up the inlining threshold wholesale on
     227             :   /// targets where calls are unusually expensive.
     228             :   ///
     229             :   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
     230             :   /// individual classes of instructions would be better.
     231             :   unsigned getInliningThresholdMultiplier() const;
     232             : 
     233             :   /// Estimate the cost of an intrinsic when lowered.
     234             :   ///
     235             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     236             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     237             :                        ArrayRef<Type *> ParamTys) const;
     238             : 
     239             :   /// Estimate the cost of an intrinsic when lowered.
     240             :   ///
     241             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     242             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     243             :                        ArrayRef<const Value *> Arguments) const;
     244             : 
     245             :   /// \return The estimated number of case clusters when lowering \p 'SI'.
     246             :   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
     247             :   /// table.
     248             :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     249             :                                             unsigned &JTSize) const;
     250             : 
     251             :   /// Estimate the cost of a given IR user when lowered.
     252             :   ///
     253             :   /// This can estimate the cost of either a ConstantExpr or Instruction when
     254             :   /// lowered. It has two primary advantages over the \c getOperationCost and
     255             :   /// \c getGEPCost above, and one significant disadvantage: it can only be
     256             :   /// used when the IR construct has already been formed.
     257             :   ///
     258             :   /// The advantages are that it can inspect the SSA use graph to reason more
     259             :   /// accurately about the cost. For example, all-constant-GEPs can often be
     260             :   /// folded into a load or other instruction, but if they are used in some
     261             :   /// other context they may not be folded. This routine can distinguish such
     262             :   /// cases.
     263             :   ///
     264             :   /// \p Operands is a list of operands which can be a result of transformations
     265             :   /// of the current operands. The number of the operands on the list must equal
     266             :   /// to the number of the current operands the IR user has. Their order on the
     267             :   /// list must be the same as the order of the current operands the IR user
     268             :   /// has.
     269             :   ///
     270             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     271             :   /// comments for a detailed explanation of the cost values.
     272             :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
     273             : 
     274             :   /// This is a helper function which calls the two-argument getUserCost
     275             :   /// with \p Operands which are the current operands U has.
     276     3401147 :   int getUserCost(const User *U) const {
     277             :     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
     278             :                                            U->value_op_end());
     279     6802294 :     return getUserCost(U, Operands);
     280             :   }
     281             : 
     282             :   /// Return true if branch divergence exists.
     283             :   ///
     284             :   /// Branch divergence has a significantly negative impact on GPU performance
     285             :   /// when threads in the same wavefront take different paths due to conditional
     286             :   /// branches.
     287             :   bool hasBranchDivergence() const;
     288             : 
     289             :   /// Returns whether V is a source of divergence.
     290             :   ///
     291             :   /// This function provides the target-dependent information for
     292             :   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
     293             :   /// builds the dependency graph, and then runs the reachability algorithm
     294             :   /// starting with the sources of divergence.
     295             :   bool isSourceOfDivergence(const Value *V) const;
     296             : 
     297             :   // Returns true for the target specific
     298             :   // set of operations which produce uniform result
     299             :   // even taking non-unform arguments
     300             :   bool isAlwaysUniform(const Value *V) const;
     301             : 
     302             :   /// Returns the address space ID for a target's 'flat' address space. Note
     303             :   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
     304             :   /// refers to as the generic address space. The flat address space is a
     305             :   /// generic address space that can be used access multiple segments of memory
     306             :   /// with different address spaces. Access of a memory location through a
     307             :   /// pointer with this address space is expected to be legal but slower
     308             :   /// compared to the same memory location accessed through a pointer with a
     309             :   /// different address space.
     310             :   //
     311             :   /// This is for targets with different pointer representations which can
     312             :   /// be converted with the addrspacecast instruction. If a pointer is converted
     313             :   /// to this address space, optimizations should attempt to replace the access
     314             :   /// with the source address space.
     315             :   ///
     316             :   /// \returns ~0u if the target does not have such a flat address space to
     317             :   /// optimize away.
     318             :   unsigned getFlatAddressSpace() const;
     319             : 
     320             :   /// Test whether calls to a function lower to actual program function
     321             :   /// calls.
     322             :   ///
     323             :   /// The idea is to test whether the program is likely to require a 'call'
     324             :   /// instruction or equivalent in order to call the given function.
     325             :   ///
     326             :   /// FIXME: It's not clear that this is a good or useful query API. Client's
     327             :   /// should probably move to simpler cost metrics using the above.
     328             :   /// Alternatively, we could split the cost interface into distinct code-size
     329             :   /// and execution-speed costs. This would allow modelling the core of this
     330             :   /// query more accurately as a call is a single small instruction, but
     331             :   /// incurs significant execution cost.
     332             :   bool isLoweredToCall(const Function *F) const;
     333             : 
     334             :   struct LSRCost {
     335             :     /// TODO: Some of these could be merged. Also, a lexical ordering
     336             :     /// isn't always optimal.
     337             :     unsigned Insns;
     338             :     unsigned NumRegs;
     339             :     unsigned AddRecCost;
     340             :     unsigned NumIVMuls;
     341             :     unsigned NumBaseAdds;
     342             :     unsigned ImmCost;
     343             :     unsigned SetupCost;
     344             :     unsigned ScaleCost;
     345             :   };
     346             : 
     347             :   /// Parameters that control the generic loop unrolling transformation.
     348             :   struct UnrollingPreferences {
     349             :     /// The cost threshold for the unrolled loop. Should be relative to the
     350             :     /// getUserCost values returned by this API, and the expectation is that
     351             :     /// the unrolled loop's instructions when run through that interface should
     352             :     /// not exceed this cost. However, this is only an estimate. Also, specific
     353             :     /// loops may be unrolled even with a cost above this threshold if deemed
     354             :     /// profitable. Set this to UINT_MAX to disable the loop body cost
     355             :     /// restriction.
     356             :     unsigned Threshold;
     357             :     /// If complete unrolling will reduce the cost of the loop, we will boost
     358             :     /// the Threshold by a certain percent to allow more aggressive complete
     359             :     /// unrolling. This value provides the maximum boost percentage that we
     360             :     /// can apply to Threshold (The value should be no less than 100).
     361             :     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
     362             :     ///                                    MaxPercentThresholdBoost / 100)
     363             :     /// E.g. if complete unrolling reduces the loop execution time by 50%
     364             :     /// then we boost the threshold by the factor of 2x. If unrolling is not
     365             :     /// expected to reduce the running time, then we do not increase the
     366             :     /// threshold.
     367             :     unsigned MaxPercentThresholdBoost;
     368             :     /// The cost threshold for the unrolled loop when optimizing for size (set
     369             :     /// to UINT_MAX to disable).
     370             :     unsigned OptSizeThreshold;
     371             :     /// The cost threshold for the unrolled loop, like Threshold, but used
     372             :     /// for partial/runtime unrolling (set to UINT_MAX to disable).
     373             :     unsigned PartialThreshold;
     374             :     /// The cost threshold for the unrolled loop when optimizing for size, like
     375             :     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
     376             :     /// UINT_MAX to disable).
     377             :     unsigned PartialOptSizeThreshold;
     378             :     /// A forced unrolling factor (the number of concatenated bodies of the
     379             :     /// original loop in the unrolled loop body). When set to 0, the unrolling
     380             :     /// transformation will select an unrolling factor based on the current cost
     381             :     /// threshold and other factors.
     382             :     unsigned Count;
     383             :     /// A forced peeling factor (the number of bodied of the original loop
     384             :     /// that should be peeled off before the loop body). When set to 0, the
     385             :     /// unrolling transformation will select a peeling factor based on profile
     386             :     /// information and other factors.
     387             :     unsigned PeelCount;
     388             :     /// Default unroll count for loops with run-time trip count.
     389             :     unsigned DefaultUnrollRuntimeCount;
     390             :     // Set the maximum unrolling factor. The unrolling factor may be selected
     391             :     // using the appropriate cost threshold, but may not exceed this number
     392             :     // (set to UINT_MAX to disable). This does not apply in cases where the
     393             :     // loop is being fully unrolled.
     394             :     unsigned MaxCount;
     395             :     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     396             :     /// applies even if full unrolling is selected. This allows a target to fall
     397             :     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     398             :     unsigned FullUnrollMaxCount;
     399             :     // Represents number of instructions optimized when "back edge"
     400             :     // becomes "fall through" in unrolled loop.
     401             :     // For now we count a conditional branch on a backedge and a comparison
     402             :     // feeding it.
     403             :     unsigned BEInsns;
     404             :     /// Allow partial unrolling (unrolling of loops to expand the size of the
     405             :     /// loop body, not only to eliminate small constant-trip-count loops).
     406             :     bool Partial;
     407             :     /// Allow runtime unrolling (unrolling of loops to expand the size of the
     408             :     /// loop body even when the number of loop iterations is not known at
     409             :     /// compile time).
     410             :     bool Runtime;
     411             :     /// Allow generation of a loop remainder (extra iterations after unroll).
     412             :     bool AllowRemainder;
     413             :     /// Allow emitting expensive instructions (such as divisions) when computing
     414             :     /// the trip count of a loop for runtime unrolling.
     415             :     bool AllowExpensiveTripCount;
     416             :     /// Apply loop unroll on any kind of loop
     417             :     /// (mainly to loops that fail runtime unrolling).
     418             :     bool Force;
     419             :     /// Allow using trip count upper bound to unroll loops.
     420             :     bool UpperBound;
     421             :     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     422             :     bool AllowPeeling;
     423             :     /// Allow unrolling of all the iterations of the runtime loop remainder.
     424             :     bool UnrollRemainder;
     425             :     /// Allow unroll and jam. Used to enable unroll and jam for the target.
     426             :     bool UnrollAndJam;
     427             :     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
     428             :     /// value above is used during unroll and jam for the outer loop size.
     429             :     /// This value is used in the same manner to limit the size of the inner
     430             :     /// loop.
     431             :     unsigned UnrollAndJamInnerLoopThreshold;
     432             :   };
     433             : 
     434             :   /// Get target-customized preferences for the generic loop unrolling
     435             :   /// transformation. The caller will initialize UP with the current
     436             :   /// target-independent defaults.
     437             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     438             :                                UnrollingPreferences &UP) const;
     439             : 
     440             :   /// @}
     441             : 
     442             :   /// \name Scalar Target Information
     443             :   /// @{
     444             : 
     445             :   /// Flags indicating the kind of support for population count.
     446             :   ///
     447             :   /// Compared to the SW implementation, HW support is supposed to
     448             :   /// significantly boost the performance when the population is dense, and it
     449             :   /// may or may not degrade performance if the population is sparse. A HW
     450             :   /// support is considered as "Fast" if it can outperform, or is on a par
     451             :   /// with, SW implementation when the population is sparse; otherwise, it is
     452             :   /// considered as "Slow".
     453             :   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
     454             : 
     455             :   /// Return true if the specified immediate is legal add immediate, that
     456             :   /// is the target has add instructions which can add a register with the
     457             :   /// immediate without having to materialize the immediate into a register.
     458             :   bool isLegalAddImmediate(int64_t Imm) const;
     459             : 
     460             :   /// Return true if the specified immediate is legal icmp immediate,
     461             :   /// that is the target has icmp instructions which can compare a register
     462             :   /// against the immediate without having to materialize the immediate into a
     463             :   /// register.
     464             :   bool isLegalICmpImmediate(int64_t Imm) const;
     465             : 
     466             :   /// Return true if the addressing mode represented by AM is legal for
     467             :   /// this target, for a load/store of the specified type.
     468             :   /// The type may be VoidTy, in which case only return true if the addressing
     469             :   /// mode is legal for a load/store of any legal type.
     470             :   /// If target returns true in LSRWithInstrQueries(), I may be valid.
     471             :   /// TODO: Handle pre/postinc as well.
     472             :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     473             :                              bool HasBaseReg, int64_t Scale,
     474             :                              unsigned AddrSpace = 0,
     475             :                              Instruction *I = nullptr) const;
     476             : 
     477             :   /// Return true if LSR cost of C1 is lower than C1.
     478             :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     479             :                      TargetTransformInfo::LSRCost &C2) const;
     480             : 
     481             :   /// Return true if the target can fuse a compare and branch.
     482             :   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
     483             :   /// calculation for the instructions in a loop.
     484             :   bool canMacroFuseCmp() const;
     485             : 
     486             :   /// \return True is LSR should make efforts to create/preserve post-inc
     487             :   /// addressing mode expressions.
     488             :   bool shouldFavorPostInc() const;
     489             : 
     490             :   /// Return true if the target supports masked load/store
     491             :   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
     492             :   bool isLegalMaskedStore(Type *DataType) const;
     493             :   bool isLegalMaskedLoad(Type *DataType) const;
     494             : 
     495             :   /// Return true if the target supports masked gather/scatter
     496             :   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
     497             :   /// bits scalar type.
     498             :   bool isLegalMaskedScatter(Type *DataType) const;
     499             :   bool isLegalMaskedGather(Type *DataType) const;
     500             : 
     501             :   /// Return true if the target has a unified operation to calculate division
     502             :   /// and remainder. If so, the additional implicit multiplication and
     503             :   /// subtraction required to calculate a remainder from division are free. This
     504             :   /// can enable more aggressive transformations for division and remainder than
     505             :   /// would typically be allowed using throughput or size cost models.
     506             :   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
     507             : 
     508             :   /// Return true if the given instruction (assumed to be a memory access
     509             :   /// instruction) has a volatile variant. If that's the case then we can avoid
     510             :   /// addrspacecast to generic AS for volatile loads/stores. Default
     511             :   /// implementation returns false, which prevents address space inference for
     512             :   /// volatile loads/stores.
     513             :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
     514             : 
     515             :   /// Return true if target doesn't mind addresses in vectors.
     516             :   bool prefersVectorizedAddressing() const;
     517             : 
     518             :   /// Return the cost of the scaling factor used in the addressing
     519             :   /// mode represented by AM for this target, for a load/store
     520             :   /// of the specified type.
     521             :   /// If the AM is supported, the return value must be >= 0.
     522             :   /// If the AM is not supported, it returns a negative value.
     523             :   /// TODO: Handle pre/postinc as well.
     524             :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     525             :                            bool HasBaseReg, int64_t Scale,
     526             :                            unsigned AddrSpace = 0) const;
     527             : 
     528             :   /// Return true if the loop strength reduce pass should make
     529             :   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
     530             :   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
     531             :   /// immediate offset and no index register.
     532             :   bool LSRWithInstrQueries() const;
     533             : 
     534             :   /// Return true if it's free to truncate a value of type Ty1 to type
     535             :   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
     536             :   /// by referencing its sub-register AX.
     537             :   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
     538             : 
     539             :   /// Return true if it is profitable to hoist instruction in the
     540             :   /// then/else to before if.
     541             :   bool isProfitableToHoist(Instruction *I) const;
     542             : 
     543             :   bool useAA() const;
     544             : 
     545             :   /// Return true if this type is legal.
     546             :   bool isTypeLegal(Type *Ty) const;
     547             : 
     548             :   /// Returns the target's jmp_buf alignment in bytes.
     549             :   unsigned getJumpBufAlignment() const;
     550             : 
     551             :   /// Returns the target's jmp_buf size in bytes.
     552             :   unsigned getJumpBufSize() const;
     553             : 
     554             :   /// Return true if switches should be turned into lookup tables for the
     555             :   /// target.
     556             :   bool shouldBuildLookupTables() const;
     557             : 
     558             :   /// Return true if switches should be turned into lookup tables
     559             :   /// containing this constant value for the target.
     560             :   bool shouldBuildLookupTablesForConstant(Constant *C) const;
     561             : 
     562             :   /// Return true if the input function which is cold at all call sites,
     563             :   ///  should use coldcc calling convention.
     564             :   bool useColdCCForColdCall(Function &F) const;
     565             : 
     566             :   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
     567             : 
     568             :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     569             :                                             unsigned VF) const;
     570             : 
     571             :   /// If target has efficient vector element load/store instructions, it can
     572             :   /// return true here so that insertion/extraction costs are not added to
     573             :   /// the scalarization cost of a load/store.
     574             :   bool supportsEfficientVectorElementLoadStore() const;
     575             : 
     576             :   /// Don't restrict interleaved unrolling to small loops.
     577             :   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
     578             : 
     579             :   /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
     580             :   /// true if this is the expansion of memcmp(p1, p2, s) == 0.
     581       31764 :   struct MemCmpExpansionOptions {
     582             :     // The list of available load sizes (in bytes), sorted in decreasing order.
     583             :     SmallVector<unsigned, 8> LoadSizes;
     584             :   };
     585             :   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
     586             : 
     587             :   /// Enable matching of interleaved access groups.
     588             :   bool enableInterleavedAccessVectorization() const;
     589             : 
     590             :   /// Indicate that it is potentially unsafe to automatically vectorize
     591             :   /// floating-point operations because the semantics of vector and scalar
     592             :   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
     593             :   /// does not support IEEE-754 denormal numbers, while depending on the
     594             :   /// platform, scalar floating-point math does.
     595             :   /// This applies to floating-point math operations and calls, not memory
     596             :   /// operations, shuffles, or casts.
     597             :   bool isFPVectorizationPotentiallyUnsafe() const;
     598             : 
     599             :   /// Determine if the target supports unaligned memory accesses.
     600             :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     601             :                                       unsigned BitWidth, unsigned AddressSpace = 0,
     602             :                                       unsigned Alignment = 1,
     603             :                                       bool *Fast = nullptr) const;
     604             : 
     605             :   /// Return hardware support for population count.
     606             :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
     607             : 
     608             :   /// Return true if the hardware has a fast square-root instruction.
     609             :   bool haveFastSqrt(Type *Ty) const;
     610             : 
     611             :   /// Return true if it is faster to check if a floating-point value is NaN
     612             :   /// (or not-NaN) versus a comparison against a constant FP zero value.
     613             :   /// Targets should override this if materializing a 0.0 for comparison is
     614             :   /// generally as cheap as checking for ordered/unordered.
     615             :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
     616             : 
     617             :   /// Return the expected cost of supporting the floating point operation
     618             :   /// of the specified type.
     619             :   int getFPOpCost(Type *Ty) const;
     620             : 
     621             :   /// Return the expected cost of materializing for the given integer
     622             :   /// immediate of the specified type.
     623             :   int getIntImmCost(const APInt &Imm, Type *Ty) const;
     624             : 
     625             :   /// Return the expected cost of materialization for the given integer
     626             :   /// immediate of the specified type for a given instruction. The cost can be
     627             :   /// zero if the immediate can be folded into the specified instruction.
     628             :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     629             :                     Type *Ty) const;
     630             :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     631             :                     Type *Ty) const;
     632             : 
     633             :   /// Return the expected cost for the given integer when optimising
     634             :   /// for size. This is different than the other integer immediate cost
     635             :   /// functions in that it is subtarget agnostic. This is useful when you e.g.
     636             :   /// target one ISA such as Aarch32 but smaller encodings could be possible
     637             :   /// with another such as Thumb. This return value is used as a penalty when
     638             :   /// the total costs for a constant is calculated (the bigger the cost, the
     639             :   /// more beneficial constant hoisting is).
     640             :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     641             :                             Type *Ty) const;
     642             :   /// @}
     643             : 
     644             :   /// \name Vector Target Information
     645             :   /// @{
     646             : 
     647             :   /// The various kinds of shuffle patterns for vector queries.
     648             :   enum ShuffleKind {
     649             :     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     650             :     SK_Reverse,         ///< Reverse the order of the vector.
     651             :     SK_Select,          ///< Selects elements from the corresponding lane of
     652             :                         ///< either source operand. This is equivalent to a
     653             :                         ///< vector select with a constant condition operand.
     654             :     SK_Transpose,       ///< Transpose two vectors.
     655             :     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     656             :     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
     657             :     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
     658             :                         ///< with any shuffle mask.
     659             :     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
     660             :                         ///< shuffle mask.
     661             :   };
     662             : 
     663             :   /// Additional information about an operand's possible values.
     664             :   enum OperandValueKind {
     665             :     OK_AnyValue,               // Operand can have any value.
     666             :     OK_UniformValue,           // Operand is uniform (splat of a value).
     667             :     OK_UniformConstantValue,   // Operand is uniform constant.
     668             :     OK_NonUniformConstantValue // Operand is a non uniform constant value.
     669             :   };
     670             : 
     671             :   /// Additional properties of an operand's values.
     672             :   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
     673             : 
     674             :   /// \return The number of scalar or vector registers that the target has.
     675             :   /// If 'Vectors' is true, it returns the number of vector registers. If it is
     676             :   /// set to false, it returns the number of scalar registers.
     677             :   unsigned getNumberOfRegisters(bool Vector) const;
     678             : 
     679             :   /// \return The width of the largest scalar or vector register type.
     680             :   unsigned getRegisterBitWidth(bool Vector) const;
     681             : 
     682             :   /// \return The width of the smallest vector register type.
     683             :   unsigned getMinVectorRegisterBitWidth() const;
     684             : 
     685             :   /// \return True if the vectorization factor should be chosen to
     686             :   /// make the vector of the smallest element type match the size of a
     687             :   /// vector register. For wider element types, this could result in
     688             :   /// creating vectors that span multiple vector registers.
     689             :   /// If false, the vectorization factor will be chosen based on the
     690             :   /// size of the widest element type.
     691             :   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
     692             : 
     693             :   /// \return The minimum vectorization factor for types of given element
     694             :   /// bit width, or 0 if there is no mimimum VF. The returned value only
     695             :   /// applies when shouldMaximizeVectorBandwidth returns true.
     696             :   unsigned getMinimumVF(unsigned ElemWidth) const;
     697             : 
     698             :   /// \return True if it should be considered for address type promotion.
     699             :   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
     700             :   /// profitable without finding other extensions fed by the same input.
     701             :   bool shouldConsiderAddressTypePromotion(
     702             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
     703             : 
     704             :   /// \return The size of a cache line in bytes.
     705             :   unsigned getCacheLineSize() const;
     706             : 
     707             :   /// The possible cache levels
     708             :   enum class CacheLevel {
     709             :     L1D,   // The L1 data cache
     710             :     L2D,   // The L2 data cache
     711             : 
     712             :     // We currently do not model L3 caches, as their sizes differ widely between
     713             :     // microarchitectures. Also, we currently do not have a use for L3 cache
     714             :     // size modeling yet.
     715             :   };
     716             : 
     717             :   /// \return The size of the cache level in bytes, if available.
     718             :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
     719             : 
     720             :   /// \return The associativity of the cache level, if available.
     721             :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
     722             : 
     723             :   /// \return How much before a load we should place the prefetch instruction.
     724             :   /// This is currently measured in number of instructions.
     725             :   unsigned getPrefetchDistance() const;
     726             : 
     727             :   /// \return Some HW prefetchers can handle accesses up to a certain constant
     728             :   /// stride.  This is the minimum stride in bytes where it makes sense to start
     729             :   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
     730             :   unsigned getMinPrefetchStride() const;
     731             : 
     732             :   /// \return The maximum number of iterations to prefetch ahead.  If the
     733             :   /// required number of iterations is more than this number, no prefetching is
     734             :   /// performed.
     735             :   unsigned getMaxPrefetchIterationsAhead() const;
     736             : 
     737             :   /// \return The maximum interleave factor that any transform should try to
     738             :   /// perform for this target. This number depends on the level of parallelism
     739             :   /// and the number of execution units in the CPU.
     740             :   unsigned getMaxInterleaveFactor(unsigned VF) const;
     741             : 
     742             :   /// This is an approximation of reciprocal throughput of a math/logic op.
     743             :   /// A higher cost indicates less expected throughput.
     744             :   /// From Agner Fog's guides, reciprocal throughput is "the average number of
     745             :   /// clock cycles per instruction when the instructions are not part of a
     746             :   /// limiting dependency chain."
     747             :   /// Therefore, costs should be scaled to account for multiple execution units
     748             :   /// on the target that can process this type of instruction. For example, if
     749             :   /// there are 5 scalar integer units and 2 vector integer units that can
     750             :   /// calculate an 'add' in a single cycle, this model should indicate that the
     751             :   /// cost of the vector add instruction is 2.5 times the cost of the scalar
     752             :   /// add instruction.
     753             :   /// \p Args is an optional argument which holds the instruction operands
     754             :   /// values so the TTI can analyze those values searching for special
     755             :   /// cases or optimizations based on those values.
     756             :   int getArithmeticInstrCost(
     757             :       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
     758             :       OperandValueKind Opd2Info = OK_AnyValue,
     759             :       OperandValueProperties Opd1PropInfo = OP_None,
     760             :       OperandValueProperties Opd2PropInfo = OP_None,
     761             :       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
     762             : 
     763             :   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
     764             :   /// The index and subtype parameters are used by the subvector insertion and
     765             :   /// extraction shuffle kinds.
     766             :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
     767             :                      Type *SubTp = nullptr) const;
     768             : 
     769             :   /// \return The expected cost of cast instructions, such as bitcast, trunc,
     770             :   /// zext, etc. If there is an existing instruction that holds Opcode, it
     771             :   /// may be passed in the 'I' parameter.
     772             :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     773             :                        const Instruction *I = nullptr) const;
     774             : 
     775             :   /// \return The expected cost of a sign- or zero-extended vector extract. Use
     776             :   /// -1 to indicate that there is no information about the index value.
     777             :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
     778             :                                unsigned Index = -1) const;
     779             : 
     780             :   /// \return The expected cost of control-flow related instructions such as
     781             :   /// Phi, Ret, Br.
     782             :   int getCFInstrCost(unsigned Opcode) const;
     783             : 
     784             :   /// \returns The expected cost of compare and select instructions. If there
     785             :   /// is an existing instruction that holds Opcode, it may be passed in the
     786             :   /// 'I' parameter.
     787             :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     788             :                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
     789             : 
     790             :   /// \return The expected cost of vector Insert and Extract.
     791             :   /// Use -1 to indicate that there is no information on the index value.
     792             :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
     793             : 
     794             :   /// \return The cost of Load and Store instructions.
     795             :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     796             :                       unsigned AddressSpace, const Instruction *I = nullptr) const;
     797             : 
     798             :   /// \return The cost of masked Load and Store instructions.
     799             :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     800             :                             unsigned AddressSpace) const;
     801             : 
     802             :   /// \return The cost of Gather or Scatter operation
     803             :   /// \p Opcode - is a type of memory access Load or Store
     804             :   /// \p DataTy - a vector type of the data to be loaded or stored
     805             :   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
     806             :   /// \p VariableMask - true when the memory access is predicated with a mask
     807             :   ///                   that is not a compile-time constant
     808             :   /// \p Alignment - alignment of single element
     809             :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
     810             :                              bool VariableMask, unsigned Alignment) const;
     811             : 
     812             :   /// \return The cost of the interleaved memory operation.
     813             :   /// \p Opcode is the memory operation code
     814             :   /// \p VecTy is the vector type of the interleaved access.
     815             :   /// \p Factor is the interleave factor
     816             :   /// \p Indices is the indices for interleaved load members (as interleaved
     817             :   ///    load allows gaps)
     818             :   /// \p Alignment is the alignment of the memory operation
     819             :   /// \p AddressSpace is address space of the pointer.
     820             :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
     821             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
     822             :                                  unsigned AddressSpace) const;
     823             : 
     824             :   /// Calculate the cost of performing a vector reduction.
     825             :   ///
     826             :   /// This is the cost of reducing the vector value of type \p Ty to a scalar
     827             :   /// value using the operation denoted by \p Opcode. The form of the reduction
     828             :   /// can either be a pairwise reduction or a reduction that splits the vector
     829             :   /// at every reduction level.
     830             :   ///
     831             :   /// Pairwise:
     832             :   ///  (v0, v1, v2, v3)
     833             :   ///  ((v0+v1), (v2+v3), undef, undef)
     834             :   /// Split:
     835             :   ///  (v0, v1, v2, v3)
     836             :   ///  ((v0+v2), (v1+v3), undef, undef)
     837             :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
     838             :                                  bool IsPairwiseForm) const;
     839             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
     840             :                              bool IsUnsigned) const;
     841             : 
     842             :   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
     843             :   /// Three cases are handled: 1. scalar instruction 2. vector instruction
     844             :   /// 3. scalar instruction which is to be vectorized with VF.
     845             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     846             :                             ArrayRef<Value *> Args, FastMathFlags FMF,
     847             :                             unsigned VF = 1) const;
     848             : 
     849             :   /// \returns The cost of Intrinsic instructions. Types analysis only.
     850             :   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
     851             :   /// arguments and the return value will be computed based on types.
     852             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     853             :                             ArrayRef<Type *> Tys, FastMathFlags FMF,
     854             :                             unsigned ScalarizationCostPassed = UINT_MAX) const;
     855             : 
     856             :   /// \returns The cost of Call instructions.
     857             :   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
     858             : 
     859             :   /// \returns The number of pieces into which the provided type must be
     860             :   /// split during legalization. Zero is returned when the answer is unknown.
     861             :   unsigned getNumberOfParts(Type *Tp) const;
     862             : 
     863             :   /// \returns The cost of the address computation. For most targets this can be
     864             :   /// merged into the instruction indexing mode. Some targets might want to
     865             :   /// distinguish between address computation for memory operations on vector
     866             :   /// types and scalar types. Such targets should override this function.
     867             :   /// The 'SE' parameter holds pointer for the scalar evolution object which
     868             :   /// is used in order to get the Ptr step value in case of constant stride.
     869             :   /// The 'Ptr' parameter holds SCEV of the access pointer.
     870             :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
     871             :                                 const SCEV *Ptr = nullptr) const;
     872             : 
     873             :   /// \returns The cost, if any, of keeping values of the given types alive
     874             :   /// over a callsite.
     875             :   ///
     876             :   /// Some types may require the use of register classes that do not have
     877             :   /// any callee-saved registers, so would require a spill and fill.
     878             :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
     879             : 
     880             :   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
     881             :   /// will contain additional information - whether the intrinsic may write
     882             :   /// or read to memory, volatility and the pointer.  Info is undefined
     883             :   /// if false is returned.
     884             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     885             : 
     886             :   /// \returns The maximum element size, in bytes, for an element
     887             :   /// unordered-atomic memory intrinsic.
     888             :   unsigned getAtomicMemIntrinsicMaxElementSize() const;
     889             : 
     890             :   /// \returns A value which is the result of the given memory intrinsic.  New
     891             :   /// instructions may be created to extract the result from the given intrinsic
     892             :   /// memory operation.  Returns nullptr if the target cannot create a result
     893             :   /// from the given intrinsic.
     894             :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     895             :                                            Type *ExpectedType) const;
     896             : 
     897             :   /// \returns The type to use in a loop expansion of a memcpy call.
     898             :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
     899             :                                   unsigned SrcAlign, unsigned DestAlign) const;
     900             : 
     901             :   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
     902             :   /// \param RemainingBytes The number of bytes to copy.
     903             :   ///
     904             :   /// Calculates the operand types to use when copying \p RemainingBytes of
     905             :   /// memory, where source and destination alignments are \p SrcAlign and
     906             :   /// \p DestAlign respectively.
     907             :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
     908             :                                          LLVMContext &Context,
     909             :                                          unsigned RemainingBytes,
     910             :                                          unsigned SrcAlign,
     911             :                                          unsigned DestAlign) const;
     912             : 
     913             :   /// \returns True if the two functions have compatible attributes for inlining
     914             :   /// purposes.
     915             :   bool areInlineCompatible(const Function *Caller,
     916             :                            const Function *Callee) const;
     917             : 
     918             :   /// The type of load/store indexing.
     919             :   enum MemIndexedMode {
     920             :     MIM_Unindexed,  ///< No indexing.
     921             :     MIM_PreInc,     ///< Pre-incrementing.
     922             :     MIM_PreDec,     ///< Pre-decrementing.
     923             :     MIM_PostInc,    ///< Post-incrementing.
     924             :     MIM_PostDec     ///< Post-decrementing.
     925             :   };
     926             : 
     927             :   /// \returns True if the specified indexed load for the given type is legal.
     928             :   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
     929             : 
     930             :   /// \returns True if the specified indexed store for the given type is legal.
     931             :   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
     932             : 
     933             :   /// \returns The bitwidth of the largest vector type that should be used to
     934             :   /// load/store in the given address space.
     935             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     936             : 
     937             :   /// \returns True if the load instruction is legal to vectorize.
     938             :   bool isLegalToVectorizeLoad(LoadInst *LI) const;
     939             : 
     940             :   /// \returns True if the store instruction is legal to vectorize.
     941             :   bool isLegalToVectorizeStore(StoreInst *SI) const;
     942             : 
     943             :   /// \returns True if it is legal to vectorize the given load chain.
     944             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     945             :                                    unsigned Alignment,
     946             :                                    unsigned AddrSpace) const;
     947             : 
     948             :   /// \returns True if it is legal to vectorize the given store chain.
     949             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     950             :                                     unsigned Alignment,
     951             :                                     unsigned AddrSpace) const;
     952             : 
     953             :   /// \returns The new vector factor value if the target doesn't support \p
     954             :   /// SizeInBytes loads or has a better vector factor.
     955             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     956             :                                unsigned ChainSizeInBytes,
     957             :                                VectorType *VecTy) const;
     958             : 
     959             :   /// \returns The new vector factor value if the target doesn't support \p
     960             :   /// SizeInBytes stores or has a better vector factor.
     961             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     962             :                                 unsigned ChainSizeInBytes,
     963             :                                 VectorType *VecTy) const;
     964             : 
     965             :   /// Flags describing the kind of vector reduction.
     966             :   struct ReductionFlags {
     967         183 :     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
     968             :     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
     969             :     bool IsSigned; ///< Whether the operation is a signed int reduction.
     970             :     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
     971             :   };
     972             : 
     973             :   /// \returns True if the target wants to handle the given reduction idiom in
     974             :   /// the intrinsics form instead of the shuffle form.
     975             :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
     976             :                              ReductionFlags Flags) const;
     977             : 
     978             :   /// \returns True if the target wants to expand the given reduction intrinsic
     979             :   /// into a shuffle sequence.
     980             :   bool shouldExpandReduction(const IntrinsicInst *II) const;
     981             :   /// @}
     982             : 
     983             : private:
     984             :   /// Estimate the latency of specified instruction.
     985             :   /// Returns 1 as the default value.
     986             :   int getInstructionLatency(const Instruction *I) const;
     987             : 
     988             :   /// Returns the expected throughput cost of the instruction.
     989             :   /// Returns -1 if the cost is unknown.
     990             :   int getInstructionThroughput(const Instruction *I) const;
     991             : 
     992             :   /// The abstract base class used to type erase specific TTI
     993             :   /// implementations.
     994             :   class Concept;
     995             : 
     996             :   /// The template model for the base class which wraps a concrete
     997             :   /// implementation in a type erased interface.
     998             :   template <typename T> class Model;
     999             : 
    1000             :   std::unique_ptr<Concept> TTIImpl;
    1001             : };
    1002             : 
    1003             : class TargetTransformInfo::Concept {
    1004             : public:
    1005             :   virtual ~Concept() = 0;
    1006             :   virtual const DataLayout &getDataLayout() const = 0;
    1007             :   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
    1008             :   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
    1009             :                          ArrayRef<const Value *> Operands) = 0;
    1010             :   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
    1011             :   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
    1012             :   virtual int getCallCost(const Function *F, int NumArgs) = 0;
    1013             :   virtual int getCallCost(const Function *F,
    1014             :                           ArrayRef<const Value *> Arguments) = 0;
    1015             :   virtual unsigned getInliningThresholdMultiplier() = 0;
    1016             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1017             :                                ArrayRef<Type *> ParamTys) = 0;
    1018             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1019             :                                ArrayRef<const Value *> Arguments) = 0;
    1020             :   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1021             :                                                     unsigned &JTSize) = 0;
    1022             :   virtual int
    1023             :   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
    1024             :   virtual bool hasBranchDivergence() = 0;
    1025             :   virtual bool isSourceOfDivergence(const Value *V) = 0;
    1026             :   virtual bool isAlwaysUniform(const Value *V) = 0;
    1027             :   virtual unsigned getFlatAddressSpace() = 0;
    1028             :   virtual bool isLoweredToCall(const Function *F) = 0;
    1029             :   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
    1030             :                                        UnrollingPreferences &UP) = 0;
    1031             :   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
    1032             :   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
    1033             :   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
    1034             :                                      int64_t BaseOffset, bool HasBaseReg,
    1035             :                                      int64_t Scale,
    1036             :                                      unsigned AddrSpace,
    1037             :                                      Instruction *I) = 0;
    1038             :   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1039             :                              TargetTransformInfo::LSRCost &C2) = 0;
    1040             :   virtual bool canMacroFuseCmp() = 0;
    1041             :   virtual bool shouldFavorPostInc() const = 0;
    1042             :   virtual bool isLegalMaskedStore(Type *DataType) = 0;
    1043             :   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
    1044             :   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
    1045             :   virtual bool isLegalMaskedGather(Type *DataType) = 0;
    1046             :   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
    1047             :   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
    1048             :   virtual bool prefersVectorizedAddressing() = 0;
    1049             :   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
    1050             :                                    int64_t BaseOffset, bool HasBaseReg,
    1051             :                                    int64_t Scale, unsigned AddrSpace) = 0;
    1052             :   virtual bool LSRWithInstrQueries() = 0;
    1053             :   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
    1054             :   virtual bool isProfitableToHoist(Instruction *I) = 0;
    1055             :   virtual bool useAA() = 0;
    1056             :   virtual bool isTypeLegal(Type *Ty) = 0;
    1057             :   virtual unsigned getJumpBufAlignment() = 0;
    1058             :   virtual unsigned getJumpBufSize() = 0;
    1059             :   virtual bool shouldBuildLookupTables() = 0;
    1060             :   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
    1061             :   virtual bool useColdCCForColdCall(Function &F) = 0;
    1062             :   virtual unsigned
    1063             :   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
    1064             :   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1065             :                                                     unsigned VF) = 0;
    1066             :   virtual bool supportsEfficientVectorElementLoadStore() = 0;
    1067             :   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    1068             :   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
    1069             :       bool IsZeroCmp) const = 0;
    1070             :   virtual bool enableInterleavedAccessVectorization() = 0;
    1071             :   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
    1072             :   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1073             :                                               unsigned BitWidth,
    1074             :                                               unsigned AddressSpace,
    1075             :                                               unsigned Alignment,
    1076             :                                               bool *Fast) = 0;
    1077             :   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    1078             :   virtual bool haveFastSqrt(Type *Ty) = 0;
    1079             :   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
    1080             :   virtual int getFPOpCost(Type *Ty) = 0;
    1081             :   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1082             :                                     Type *Ty) = 0;
    1083             :   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    1084             :   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1085             :                             Type *Ty) = 0;
    1086             :   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1087             :                             Type *Ty) = 0;
    1088             :   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    1089             :   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
    1090             :   virtual unsigned getMinVectorRegisterBitWidth() = 0;
    1091             :   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
    1092             :   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
    1093             :   virtual bool shouldConsiderAddressTypePromotion(
    1094             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
    1095             :   virtual unsigned getCacheLineSize() = 0;
    1096             :   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
    1097             :   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
    1098             :   virtual unsigned getPrefetchDistance() = 0;
    1099             :   virtual unsigned getMinPrefetchStride() = 0;
    1100             :   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    1101             :   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    1102             :   virtual unsigned
    1103             :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1104             :                          OperandValueKind Opd2Info,
    1105             :                          OperandValueProperties Opd1PropInfo,
    1106             :                          OperandValueProperties Opd2PropInfo,
    1107             :                          ArrayRef<const Value *> Args) = 0;
    1108             :   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1109             :                              Type *SubTp) = 0;
    1110             :   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1111             :                                const Instruction *I) = 0;
    1112             :   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
    1113             :                                        VectorType *VecTy, unsigned Index) = 0;
    1114             :   virtual int getCFInstrCost(unsigned Opcode) = 0;
    1115             :   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    1116             :                                 Type *CondTy, const Instruction *I) = 0;
    1117             :   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
    1118             :                                  unsigned Index) = 0;
    1119             :   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1120             :                               unsigned AddressSpace, const Instruction *I) = 0;
    1121             :   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    1122             :                                     unsigned Alignment,
    1123             :                                     unsigned AddressSpace) = 0;
    1124             :   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1125             :                                      Value *Ptr, bool VariableMask,
    1126             :                                      unsigned Alignment) = 0;
    1127             :   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
    1128             :                                          unsigned Factor,
    1129             :                                          ArrayRef<unsigned> Indices,
    1130             :                                          unsigned Alignment,
    1131             :                                          unsigned AddressSpace) = 0;
    1132             :   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1133             :                                          bool IsPairwiseForm) = 0;
    1134             :   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1135             :                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
    1136             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1137             :                       ArrayRef<Type *> Tys, FastMathFlags FMF,
    1138             :                       unsigned ScalarizationCostPassed) = 0;
    1139             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1140             :          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    1141             :   virtual int getCallInstrCost(Function *F, Type *RetTy,
    1142             :                                ArrayRef<Type *> Tys) = 0;
    1143             :   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    1144             :   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1145             :                                         const SCEV *Ptr) = 0;
    1146             :   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    1147             :   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1148             :                                   MemIntrinsicInfo &Info) = 0;
    1149             :   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
    1150             :   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1151             :                                                    Type *ExpectedType) = 0;
    1152             :   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1153             :                                           unsigned SrcAlign,
    1154             :                                           unsigned DestAlign) const = 0;
    1155             :   virtual void getMemcpyLoopResidualLoweringType(
    1156             :       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    1157             :       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
    1158             :   virtual bool areInlineCompatible(const Function *Caller,
    1159             :                                    const Function *Callee) const = 0;
    1160             :   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
    1161             :   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
    1162             :   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
    1163             :   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
    1164             :   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
    1165             :   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1166             :                                            unsigned Alignment,
    1167             :                                            unsigned AddrSpace) const = 0;
    1168             :   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1169             :                                             unsigned Alignment,
    1170             :                                             unsigned AddrSpace) const = 0;
    1171             :   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1172             :                                        unsigned ChainSizeInBytes,
    1173             :                                        VectorType *VecTy) const = 0;
    1174             :   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1175             :                                         unsigned ChainSizeInBytes,
    1176             :                                         VectorType *VecTy) const = 0;
    1177             :   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1178             :                                      ReductionFlags) const = 0;
    1179             :   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
    1180             :   virtual int getInstructionLatency(const Instruction *I) = 0;
    1181             : };
    1182             : 
    1183             : template <typename T>
    1184             : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    1185             :   T Impl;
    1186             : 
    1187             : public:
    1188     2893770 :   Model(T Impl) : Impl(std::move(Impl)) {}
    1189     3739822 :   ~Model() override {}
    1190             : 
    1191           0 :   const DataLayout &getDataLayout() const override {
    1192           0 :     return Impl.getDataLayout();
    1193             :   }
    1194             : 
    1195           0 :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    1196           0 :     return Impl.getOperationCost(Opcode, Ty, OpTy);
    1197             :   }
    1198       34944 :   int getGEPCost(Type *PointeeType, const Value *Ptr,
    1199             :                  ArrayRef<const Value *> Operands) override {
    1200       34944 :     return Impl.getGEPCost(PointeeType, Ptr, Operands);
    1201             :   }
    1202           0 :   int getExtCost(const Instruction *I, const Value *Src) override {
    1203           0 :     return Impl.getExtCost(I, Src);
    1204             :   }
    1205           0 :   int getCallCost(FunctionType *FTy, int NumArgs) override {
    1206           0 :     return Impl.getCallCost(FTy, NumArgs);
    1207             :   }
    1208           0 :   int getCallCost(const Function *F, int NumArgs) override {
    1209           0 :     return Impl.getCallCost(F, NumArgs);
    1210             :   }
    1211           0 :   int getCallCost(const Function *F,
    1212             :                   ArrayRef<const Value *> Arguments) override {
    1213           0 :     return Impl.getCallCost(F, Arguments);
    1214             :   }
    1215      246066 :   unsigned getInliningThresholdMultiplier() override {
    1216      246066 :     return Impl.getInliningThresholdMultiplier();
    1217             :   }
    1218           0 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1219             :                        ArrayRef<Type *> ParamTys) override {
    1220           0 :     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    1221             :   }
    1222           4 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1223             :                        ArrayRef<const Value *> Arguments) override {
    1224           4 :     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    1225             :   }
    1226     3632224 :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
    1227     3632224 :     return Impl.getUserCost(U, Operands);
    1228             :   }
    1229     1712786 :   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    1230     1325098 :   bool isSourceOfDivergence(const Value *V) override {
    1231     1325098 :     return Impl.isSourceOfDivergence(V);
    1232             :   }
    1233             : 
    1234      234682 :   bool isAlwaysUniform(const Value *V) override {
    1235      234682 :     return Impl.isAlwaysUniform(V);
    1236             :   }
    1237             : 
    1238       21607 :   unsigned getFlatAddressSpace() override {
    1239       39847 :     return Impl.getFlatAddressSpace();
    1240             :   }
    1241             : 
    1242      976471 :   bool isLoweredToCall(const Function *F) override {
    1243      976471 :     return Impl.isLoweredToCall(F);
    1244             :   }
    1245        6507 :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
    1246             :                                UnrollingPreferences &UP) override {
    1247        6507 :     return Impl.getUnrollingPreferences(L, SE, UP);
    1248             :   }
    1249       25365 :   bool isLegalAddImmediate(int64_t Imm) override {
    1250       25365 :     return Impl.isLegalAddImmediate(Imm);
    1251             :   }
    1252       26876 :   bool isLegalICmpImmediate(int64_t Imm) override {
    1253       26876 :     return Impl.isLegalICmpImmediate(Imm);
    1254             :   }
    1255      400715 :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1256             :                              bool HasBaseReg, int64_t Scale,
    1257             :                              unsigned AddrSpace,
    1258             :                              Instruction *I) override {
    1259             :     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    1260      400715 :                                       Scale, AddrSpace, I);
    1261             :   }
    1262       97388 :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1263             :                      TargetTransformInfo::LSRCost &C2) override {
    1264       97388 :     return Impl.isLSRCostLess(C1, C2);
    1265             :   }
    1266       25664 :   bool canMacroFuseCmp() override {
    1267       25664 :     return Impl.canMacroFuseCmp();
    1268             :   }
    1269      201427 :   bool shouldFavorPostInc() const override {
    1270      201427 :     return Impl.shouldFavorPostInc();
    1271             :   }
    1272         446 :   bool isLegalMaskedStore(Type *DataType) override {
    1273         446 :     return Impl.isLegalMaskedStore(DataType);
    1274             :   }
    1275         487 :   bool isLegalMaskedLoad(Type *DataType) override {
    1276         487 :     return Impl.isLegalMaskedLoad(DataType);
    1277             :   }
    1278         596 :   bool isLegalMaskedScatter(Type *DataType) override {
    1279         596 :     return Impl.isLegalMaskedScatter(DataType);
    1280             :   }
    1281         719 :   bool isLegalMaskedGather(Type *DataType) override {
    1282         719 :     return Impl.isLegalMaskedGather(DataType);
    1283             :   }
    1284          35 :   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    1285          35 :     return Impl.hasDivRemOp(DataType, IsSigned);
    1286             :   }
    1287         592 :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
    1288         592 :     return Impl.hasVolatileVariant(I, AddrSpace);
    1289             :   }
    1290        1158 :   bool prefersVectorizedAddressing() override {
    1291        1158 :     return Impl.prefersVectorizedAddressing();
    1292             :   }
    1293       86434 :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1294             :                            bool HasBaseReg, int64_t Scale,
    1295             :                            unsigned AddrSpace) override {
    1296             :     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
    1297       86434 :                                      Scale, AddrSpace);
    1298             :   }
    1299       50163 :   bool LSRWithInstrQueries() override {
    1300       50163 :     return Impl.LSRWithInstrQueries();
    1301             :   }
    1302       19776 :   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    1303       19776 :     return Impl.isTruncateFree(Ty1, Ty2);
    1304             :   }
    1305        3521 :   bool isProfitableToHoist(Instruction *I) override {
    1306        3521 :     return Impl.isProfitableToHoist(I);
    1307             :   }
    1308        1118 :   bool useAA() override { return Impl.useAA(); }
    1309         679 :   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    1310           0 :   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    1311           0 :   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    1312         426 :   bool shouldBuildLookupTables() override {
    1313         426 :     return Impl.shouldBuildLookupTables();
    1314             :   }
    1315        1102 :   bool shouldBuildLookupTablesForConstant(Constant *C) override {
    1316        1102 :     return Impl.shouldBuildLookupTablesForConstant(C);
    1317             :   }
    1318           2 :   bool useColdCCForColdCall(Function &F) override {
    1319           2 :     return Impl.useColdCCForColdCall(F);
    1320             :   }
    1321             : 
    1322         745 :   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
    1323             :                                     bool Extract) override {
    1324         745 :     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
    1325             :   }
    1326         783 :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1327             :                                             unsigned VF) override {
    1328         783 :     return Impl.getOperandsScalarizationOverhead(Args, VF);
    1329             :   }
    1330             : 
    1331         389 :   bool supportsEfficientVectorElementLoadStore() override {
    1332         389 :     return Impl.supportsEfficientVectorElementLoadStore();
    1333             :   }
    1334             : 
    1335          65 :   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    1336          65 :     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    1337             :   }
    1338      184776 :   const MemCmpExpansionOptions *enableMemCmpExpansion(
    1339             :       bool IsZeroCmp) const override {
    1340      184776 :     return Impl.enableMemCmpExpansion(IsZeroCmp);
    1341             :   }
    1342         858 :   bool enableInterleavedAccessVectorization() override {
    1343         858 :     return Impl.enableInterleavedAccessVectorization();
    1344             :   }
    1345         150 :   bool isFPVectorizationPotentiallyUnsafe() override {
    1346         150 :     return Impl.isFPVectorizationPotentiallyUnsafe();
    1347             :   }
    1348        7931 :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1349             :                                       unsigned BitWidth, unsigned AddressSpace,
    1350             :                                       unsigned Alignment, bool *Fast) override {
    1351             :     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
    1352        7931 :                                                Alignment, Fast);
    1353             :   }
    1354        2417 :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    1355        2417 :     return Impl.getPopcntSupport(IntTyWidthInBit);
    1356             :   }
    1357          58 :   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    1358             : 
    1359          27 :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
    1360          27 :     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
    1361             :   }
    1362             : 
    1363         542 :   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
    1364             : 
    1365         106 :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1366             :                             Type *Ty) override {
    1367         106 :     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
    1368             :   }
    1369          84 :   int getIntImmCost(const APInt &Imm, Type *Ty) override {
    1370          84 :     return Impl.getIntImmCost(Imm, Ty);
    1371             :   }
    1372      465899 :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1373             :                     Type *Ty) override {
    1374      465964 :     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    1375             :   }
    1376      147636 :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1377             :                     Type *Ty) override {
    1378      147640 :     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    1379             :   }
    1380      192080 :   unsigned getNumberOfRegisters(bool Vector) override {
    1381      192080 :     return Impl.getNumberOfRegisters(Vector);
    1382             :   }
    1383       10842 :   unsigned getRegisterBitWidth(bool Vector) const override {
    1384       10842 :     return Impl.getRegisterBitWidth(Vector);
    1385             :   }
    1386        9236 :   unsigned getMinVectorRegisterBitWidth() override {
    1387        9313 :     return Impl.getMinVectorRegisterBitWidth();
    1388             :   }
    1389         784 :   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
    1390         784 :     return Impl.shouldMaximizeVectorBandwidth(OptSize);
    1391             :   }
    1392           1 :   unsigned getMinimumVF(unsigned ElemWidth) const override {
    1393           1 :     return Impl.getMinimumVF(ElemWidth);
    1394             :   }
    1395       33838 :   bool shouldConsiderAddressTypePromotion(
    1396             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
    1397             :     return Impl.shouldConsiderAddressTypePromotion(
    1398       33838 :         I, AllowPromotionWithoutCommonHeader);
    1399             :   }
    1400          51 :   unsigned getCacheLineSize() override {
    1401          51 :     return Impl.getCacheLineSize();
    1402             :   }
    1403           8 :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
    1404           8 :     return Impl.getCacheSize(Level);
    1405             :   }
    1406          10 :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
    1407          10 :     return Impl.getCacheAssociativity(Level);
    1408             :   }
    1409       20758 :   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
    1410          78 :   unsigned getMinPrefetchStride() override {
    1411          78 :     return Impl.getMinPrefetchStride();
    1412             :   }
    1413         226 :   unsigned getMaxPrefetchIterationsAhead() override {
    1414         226 :     return Impl.getMaxPrefetchIterationsAhead();
    1415             :   }
    1416        1904 :   unsigned getMaxInterleaveFactor(unsigned VF) override {
    1417        1937 :     return Impl.getMaxInterleaveFactor(VF);
    1418             :   }
    1419        1650 :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1420             :                                             unsigned &JTSize) override {
    1421        1650 :     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
    1422             :   }
    1423             :   unsigned
    1424      163228 :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1425             :                          OperandValueKind Opd2Info,
    1426             :                          OperandValueProperties Opd1PropInfo,
    1427             :                          OperandValueProperties Opd2PropInfo,
    1428             :                          ArrayRef<const Value *> Args) override {
    1429             :     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    1430      163228 :                                        Opd1PropInfo, Opd2PropInfo, Args);
    1431             :   }
    1432        4148 :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1433             :                      Type *SubTp) override {
    1434        4148 :     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    1435             :   }
    1436        6167 :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1437             :                        const Instruction *I) override {
    1438        6167 :     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
    1439             :   }
    1440          32 :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
    1441             :                                unsigned Index) override {
    1442          32 :     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
    1443             :   }
    1444       10383 :   int getCFInstrCost(unsigned Opcode) override {
    1445       10383 :     return Impl.getCFInstrCost(Opcode);
    1446             :   }
    1447        4590 :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    1448             :                          const Instruction *I) override {
    1449        4590 :     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
    1450             :   }
    1451       59694 :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    1452       59694 :     return Impl.getVectorInstrCost(Opcode, Val, Index);
    1453             :   }
    1454      305122 :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1455             :                       unsigned AddressSpace, const Instruction *I) override {
    1456      305122 :     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
    1457             :   }
    1458         126 :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1459             :                             unsigned AddressSpace) override {
    1460         126 :     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    1461             :   }
    1462         104 :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1463             :                              Value *Ptr, bool VariableMask,
    1464             :                              unsigned Alignment) override {
    1465             :     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
    1466         104 :                                        Alignment);
    1467             :   }
    1468          70 :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
    1469             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
    1470             :                                  unsigned AddressSpace) override {
    1471             :     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
    1472          70 :                                            Alignment, AddressSpace);
    1473             :   }
    1474         285 :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1475             :                                  bool IsPairwiseForm) override {
    1476         285 :     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
    1477             :   }
    1478         936 :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1479             :                              bool IsPairwiseForm, bool IsUnsigned) override {
    1480         936 :     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
    1481             :    }
    1482         955 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
    1483             :                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
    1484             :     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
    1485         955 :                                       ScalarizationCostPassed);
    1486             :   }
    1487        2855 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1488             :        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
    1489        2855 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    1490             :   }
    1491         554 :   int getCallInstrCost(Function *F, Type *RetTy,
    1492             :                        ArrayRef<Type *> Tys) override {
    1493         554 :     return Impl.getCallInstrCost(F, RetTy, Tys);
    1494             :   }
    1495       14867 :   unsigned getNumberOfParts(Type *Tp) override {
    1496       14867 :     return Impl.getNumberOfParts(Tp);
    1497             :   }
    1498        1267 :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1499             :                                 const SCEV *Ptr) override {
    1500        1267 :     return Impl.getAddressComputationCost(Ty, SE, Ptr);
    1501             :   }
    1502       12397 :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    1503       12397 :     return Impl.getCostOfKeepingLiveOverCall(Tys);
    1504             :   }
    1505      204300 :   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1506             :                           MemIntrinsicInfo &Info) override {
    1507      204300 :     return Impl.getTgtMemIntrinsic(Inst, Info);
    1508             :   }
    1509           9 :   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
    1510           9 :     return Impl.getAtomicMemIntrinsicMaxElementSize();
    1511             :   }
    1512          29 :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1513             :                                            Type *ExpectedType) override {
    1514          29 :     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    1515             :   }
    1516          15 :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1517             :                                   unsigned SrcAlign,
    1518             :                                   unsigned DestAlign) const override {
    1519          15 :     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
    1520             :   }
    1521           0 :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
    1522             :                                          LLVMContext &Context,
    1523             :                                          unsigned RemainingBytes,
    1524             :                                          unsigned SrcAlign,
    1525             :                                          unsigned DestAlign) const override {
    1526           0 :     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
    1527             :                                            SrcAlign, DestAlign);
    1528           0 :   }
    1529      379581 :   bool areInlineCompatible(const Function *Caller,
    1530             :                            const Function *Callee) const override {
    1531      379581 :     return Impl.areInlineCompatible(Caller, Callee);
    1532             :   }
    1533        7617 :   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
    1534       15234 :     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
    1535             :   }
    1536           0 :   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
    1537           0 :     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
    1538             :   }
    1539       65512 :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
    1540       65512 :     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
    1541             :   }
    1542       40679 :   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
    1543       40679 :     return Impl.isLegalToVectorizeLoad(LI);
    1544             :   }
    1545       16563 :   bool isLegalToVectorizeStore(StoreInst *SI) const override {
    1546       16563 :     return Impl.isLegalToVectorizeStore(SI);
    1547             :   }
    1548        8375 :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1549             :                                    unsigned Alignment,
    1550             :                                    unsigned AddrSpace) const override {
    1551             :     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
    1552        8375 :                                             AddrSpace);
    1553             :   }
    1554         483 :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1555             :                                     unsigned Alignment,
    1556             :                                     unsigned AddrSpace) const override {
    1557             :     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
    1558         483 :                                              AddrSpace);
    1559             :   }
    1560       12883 :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1561             :                                unsigned ChainSizeInBytes,
    1562             :                                VectorType *VecTy) const override {
    1563       12883 :     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
    1564             :   }
    1565        3884 :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1566             :                                 unsigned ChainSizeInBytes,
    1567             :                                 VectorType *VecTy) const override {
    1568        3884 :     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
    1569             :   }
    1570         305 :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1571             :                              ReductionFlags Flags) const override {
    1572         305 :     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
    1573             :   }
    1574        1741 :   bool shouldExpandReduction(const IntrinsicInst *II) const override {
    1575        1741 :     return Impl.shouldExpandReduction(II);
    1576             :   }
    1577          11 :   int getInstructionLatency(const Instruction *I) override {
    1578          11 :     return Impl.getInstructionLatency(I);
    1579             :   }
    1580             : };
    1581             : 
    1582             : template <typename T>
    1583      632175 : TargetTransformInfo::TargetTransformInfo(T Impl)
    1584     3509415 :     : TTIImpl(new Model<T>(Impl)) {}
    1585             : 
    1586             : /// Analysis pass providing the \c TargetTransformInfo.
    1587             : ///
    1588             : /// The core idea of the TargetIRAnalysis is to expose an interface through
    1589             : /// which LLVM targets can analyze and provide information about the middle
    1590             : /// end's target-independent IR. This supports use cases such as target-aware
    1591             : /// cost modeling of IR constructs.
    1592             : ///
    1593             : /// This is a function analysis because much of the cost modeling for targets
    1594             : /// is done in a subtarget specific way and LLVM supports compiling different
    1595             : /// functions targeting different subtargets in order to support runtime
    1596             : /// dispatch according to the observed subtarget.
    1597       62716 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
    1598             : public:
    1599             :   typedef TargetTransformInfo Result;
    1600             : 
    1601             :   /// Default construct a target IR analysis.
    1602             :   ///
    1603             :   /// This will use the module's datalayout to construct a baseline
    1604             :   /// conservative TTI result.
    1605             :   TargetIRAnalysis();
    1606             : 
    1607             :   /// Construct an IR analysis pass around a target-provide callback.
    1608             :   ///
    1609             :   /// The callback will be called with a particular function for which the TTI
    1610             :   /// is needed and must return a TTI object for that function.
    1611             :   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
    1612             : 
    1613             :   // Value semantics. We spell out the constructors for MSVC.
    1614             :   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    1615             :       : TTICallback(Arg.TTICallback) {}
    1616             :   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    1617             :       : TTICallback(std::move(Arg.TTICallback)) {}
    1618             :   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    1619             :     TTICallback = RHS.TTICallback;
    1620             :     return *this;
    1621             :   }
    1622             :   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    1623             :     TTICallback = std::move(RHS.TTICallback);
    1624             :     return *this;
    1625             :   }
    1626             : 
    1627             :   Result run(const Function &F, FunctionAnalysisManager &);
    1628             : 
    1629             : private:
    1630             :   friend AnalysisInfoMixin<TargetIRAnalysis>;
    1631             :   static AnalysisKey Key;
    1632             : 
    1633             :   /// The callback used to produce a result.
    1634             :   ///
    1635             :   /// We use a completely opaque callback so that targets can provide whatever
    1636             :   /// mechanism they desire for constructing the TTI for a given function.
    1637             :   ///
    1638             :   /// FIXME: Should we really use std::function? It's relatively inefficient.
    1639             :   /// It might be possible to arrange for even stateful callbacks to outlive
    1640             :   /// the analysis and thus use a function_ref which would be lighter weight.
    1641             :   /// This may also be less error prone as the callback is likely to reference
    1642             :   /// the external TargetMachine, and that reference needs to never dangle.
    1643             :   std::function<Result(const Function &)> TTICallback;
    1644             : 
    1645             :   /// Helper function used as the callback in the default constructor.
    1646             :   static Result getDefaultTTI(const Function &F);
    1647             : };
    1648             : 
    1649             : /// Wrapper pass for TargetTransformInfo.
    1650             : ///
    1651             : /// This pass can be constructed from a TTI object which it stores internally
    1652             : /// and is queried by passes.
    1653      188148 : class TargetTransformInfoWrapperPass : public ImmutablePass {
    1654             :   TargetIRAnalysis TIRA;
    1655             :   Optional<TargetTransformInfo> TTI;
    1656             : 
    1657             :   virtual void anchor();
    1658             : 
    1659             : public:
    1660             :   static char ID;
    1661             : 
    1662             :   /// We must provide a default constructor for the pass but it should
    1663             :   /// never be used.
    1664             :   ///
    1665             :   /// Use the constructor below or call one of the creation routines.
    1666             :   TargetTransformInfoWrapperPass();
    1667             : 
    1668             :   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1669             : 
    1670             :   TargetTransformInfo &getTTI(const Function &F);
    1671             : };
    1672             : 
    1673             : /// Create an analysis pass wrapper around a TTI object.
    1674             : ///
    1675             : /// This analysis pass just holds the TTI instance and makes it available to
    1676             : /// clients.
    1677             : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1678             : 
    1679             : } // End llvm namespace
    1680             : 
    1681             : #endif

Generated by: LCOV version 1.13