LCOV - code coverage report
Current view: top level - include/llvm/Analysis - TargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 209 231 90.5 %
Date: 2018-05-20 00:06:23 Functions: 590 1328 44.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file
      10             : /// This pass exposes codegen information to IR-level passes. Every
      11             : /// transformation that uses codegen information is broken into three parts:
      12             : /// 1. The IR-level analysis pass.
      13             : /// 2. The IR-level transformation interface which provides the needed
      14             : ///    information.
      15             : /// 3. Codegen-level implementation which uses target-specific hooks.
      16             : ///
      17             : /// This file defines #2, which is the interface that IR-level transformations
      18             : /// use for querying the codegen.
      19             : ///
      20             : //===----------------------------------------------------------------------===//
      21             : 
      22             : #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      23             : #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
      24             : 
      25             : #include "llvm/ADT/Optional.h"
      26             : #include "llvm/IR/Operator.h"
      27             : #include "llvm/IR/PassManager.h"
      28             : #include "llvm/Pass.h"
      29             : #include "llvm/Support/AtomicOrdering.h"
      30             : #include "llvm/Support/DataTypes.h"
      31             : #include <functional>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : namespace Intrinsic {
      36             : enum ID : unsigned;
      37             : }
      38             : 
      39             : class Function;
      40             : class GlobalValue;
      41             : class IntrinsicInst;
      42             : class LoadInst;
      43             : class Loop;
      44             : class SCEV;
      45             : class ScalarEvolution;
      46             : class StoreInst;
      47             : class SwitchInst;
      48             : class Type;
      49             : class User;
      50             : class Value;
      51             : 
      52             : /// Information about a load/store intrinsic defined by the target.
      53     2433277 : struct MemIntrinsicInfo {
      54             :   /// This is the pointer that the intrinsic is loading from or storing to.
      55             :   /// If this is non-null, then analysis/optimization passes can assume that
      56             :   /// this intrinsic is functionally equivalent to a load/store from this
      57             :   /// pointer.
      58             :   Value *PtrVal = nullptr;
      59             : 
      60             :   // Ordering for atomic operations.
      61             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
      62             : 
      63             :   // Same Id is set by the target for corresponding load/store intrinsics.
      64             :   unsigned short MatchingId = 0;
      65             : 
      66             :   bool ReadMem = false;
      67             :   bool WriteMem = false;
      68             :   bool IsVolatile = false;
      69             : 
      70             :   bool isUnordered() const {
      71         806 :     return (Ordering == AtomicOrdering::NotAtomic ||
      72         806 :             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
      73             :   }
      74             : };
      75             : 
      76             : /// This pass provides access to the codegen interfaces that are needed
      77             : /// for IR-level transformations.
      78             : class TargetTransformInfo {
      79             : public:
      80             :   /// Construct a TTI object using a type implementing the \c Concept
      81             :   /// API below.
      82             :   ///
      83             :   /// This is used by targets to construct a TTI wrapping their target-specific
      84             :   /// implementaion that encodes appropriate costs for their target.
      85             :   template <typename T> TargetTransformInfo(T Impl);
      86             : 
      87             :   /// Construct a baseline TTI object using a minimal implementation of
      88             :   /// the \c Concept API below.
      89             :   ///
      90             :   /// The TTI implementation will reflect the information in the DataLayout
      91             :   /// provided if non-null.
      92             :   explicit TargetTransformInfo(const DataLayout &DL);
      93             : 
      94             :   // Provide move semantics.
      95             :   TargetTransformInfo(TargetTransformInfo &&Arg);
      96             :   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
      97             : 
      98             :   // We need to define the destructor out-of-line to define our sub-classes
      99             :   // out-of-line.
     100             :   ~TargetTransformInfo();
     101             : 
     102             :   /// Handle the invalidation of this information.
     103             :   ///
     104             :   /// When used as a result of \c TargetIRAnalysis this method will be called
     105             :   /// when the function this was computed for changes. When it returns false,
     106             :   /// the information is preserved across those changes.
     107             :   bool invalidate(Function &, const PreservedAnalyses &,
     108             :                   FunctionAnalysisManager::Invalidator &) {
     109             :     // FIXME: We should probably in some way ensure that the subtarget
     110             :     // information for a function hasn't changed.
     111             :     return false;
     112             :   }
     113             : 
     114             :   /// \name Generic Target Information
     115             :   /// @{
     116             : 
     117             :   /// The kind of cost model.
     118             :   ///
     119             :   /// There are several different cost models that can be customized by the
     120             :   /// target. The normalization of each cost model may be target specific.
     121             :   enum TargetCostKind {
     122             :     TCK_RecipThroughput, ///< Reciprocal throughput.
     123             :     TCK_Latency,         ///< The latency of instruction.
     124             :     TCK_CodeSize         ///< Instruction code size.
     125             :   };
     126             : 
     127             :   /// Query the cost of a specified instruction.
     128             :   ///
     129             :   /// Clients should use this interface to query the cost of an existing
     130             :   /// instruction. The instruction must have a valid parent (basic block).
     131             :   ///
     132             :   /// Note, this method does not cache the cost calculation and it
     133             :   /// can be expensive in some cases.
     134       26298 :   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
     135       26298 :     switch (kind){
     136       26276 :     case TCK_RecipThroughput:
     137       26276 :       return getInstructionThroughput(I);
     138             : 
     139          11 :     case TCK_Latency:
     140          11 :       return getInstructionLatency(I);
     141             : 
     142          11 :     case TCK_CodeSize:
     143        1592 :       return getUserCost(I);
     144             :     }
     145           0 :     llvm_unreachable("Unknown instruction cost kind");
     146             :   }
     147             : 
     148             :   /// Underlying constants for 'cost' values in this interface.
     149             :   ///
     150             :   /// Many APIs in this interface return a cost. This enum defines the
     151             :   /// fundamental values that should be used to interpret (and produce) those
     152             :   /// costs. The costs are returned as an int rather than a member of this
     153             :   /// enumeration because it is expected that the cost of one IR instruction
     154             :   /// may have a multiplicative factor to it or otherwise won't fit directly
     155             :   /// into the enum. Moreover, it is common to sum or average costs which works
     156             :   /// better as simple integral values. Thus this enum only provides constants.
     157             :   /// Also note that the returned costs are signed integers to make it natural
     158             :   /// to add, subtract, and test with zero (a common boundary condition). It is
     159             :   /// not expected that 2^32 is a realistic cost to be modeling at any point.
     160             :   ///
     161             :   /// Note that these costs should usually reflect the intersection of code-size
     162             :   /// cost and execution cost. A free instruction is typically one that folds
     163             :   /// into another instruction. For example, reg-to-reg moves can often be
     164             :   /// skipped by renaming the registers in the CPU, but they still are encoded
     165             :   /// and thus wouldn't be considered 'free' here.
     166             :   enum TargetCostConstants {
     167             :     TCC_Free = 0,     ///< Expected to fold away in lowering.
     168             :     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
     169             :     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
     170             :   };
     171             : 
     172             :   /// Estimate the cost of a specific operation when lowered.
     173             :   ///
     174             :   /// Note that this is designed to work on an arbitrary synthetic opcode, and
     175             :   /// thus work for hypothetical queries before an instruction has even been
     176             :   /// formed. However, this does *not* work for GEPs, and must not be called
     177             :   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
     178             :   /// analyzing a GEP's cost required more information.
     179             :   ///
     180             :   /// Typically only the result type is required, and the operand type can be
     181             :   /// omitted. However, if the opcode is one of the cast instructions, the
     182             :   /// operand type is required.
     183             :   ///
     184             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     185             :   /// comments for a detailed explanation of the cost values.
     186             :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
     187             : 
     188             :   /// Estimate the cost of a GEP operation when lowered.
     189             :   ///
     190             :   /// The contract for this function is the same as \c getOperationCost except
     191             :   /// that it supports an interface that provides extra information specific to
     192             :   /// the GEP operation.
     193             :   int getGEPCost(Type *PointeeType, const Value *Ptr,
     194             :                  ArrayRef<const Value *> Operands) const;
     195             : 
     196             :   /// Estimate the cost of a EXT operation when lowered.
     197             :   ///
     198             :   /// The contract for this function is the same as \c getOperationCost except
     199             :   /// that it supports an interface that provides extra information specific to
     200             :   /// the EXT operation.
     201             :   int getExtCost(const Instruction *I, const Value *Src) const;
     202             : 
     203             :   /// Estimate the cost of a function call when lowered.
     204             :   ///
     205             :   /// The contract for this is the same as \c getOperationCost except that it
     206             :   /// supports an interface that provides extra information specific to call
     207             :   /// instructions.
     208             :   ///
     209             :   /// This is the most basic query for estimating call cost: it only knows the
     210             :   /// function type and (potentially) the number of arguments at the call site.
     211             :   /// The latter is only interesting for varargs function types.
     212             :   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
     213             : 
     214             :   /// Estimate the cost of calling a specific function when lowered.
     215             :   ///
     216             :   /// This overload adds the ability to reason about the particular function
     217             :   /// being called in the event it is a library call with special lowering.
     218             :   int getCallCost(const Function *F, int NumArgs = -1) const;
     219             : 
     220             :   /// Estimate the cost of calling a specific function when lowered.
     221             :   ///
     222             :   /// This overload allows specifying a set of candidate argument values.
     223             :   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
     224             : 
     225             :   /// \returns A value by which our inlining threshold should be multiplied.
     226             :   /// This is primarily used to bump up the inlining threshold wholesale on
     227             :   /// targets where calls are unusually expensive.
     228             :   ///
     229             :   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
     230             :   /// individual classes of instructions would be better.
     231             :   unsigned getInliningThresholdMultiplier() const;
     232             : 
     233             :   /// Estimate the cost of an intrinsic when lowered.
     234             :   ///
     235             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     236             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     237             :                        ArrayRef<Type *> ParamTys) const;
     238             : 
     239             :   /// Estimate the cost of an intrinsic when lowered.
     240             :   ///
     241             :   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
     242             :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
     243             :                        ArrayRef<const Value *> Arguments) const;
     244             : 
     245             :   /// \return The estimated number of case clusters when lowering \p 'SI'.
     246             :   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
     247             :   /// table.
     248             :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
     249             :                                             unsigned &JTSize) const;
     250             : 
     251             :   /// Estimate the cost of a given IR user when lowered.
     252             :   ///
     253             :   /// This can estimate the cost of either a ConstantExpr or Instruction when
     254             :   /// lowered. It has two primary advantages over the \c getOperationCost and
     255             :   /// \c getGEPCost above, and one significant disadvantage: it can only be
     256             :   /// used when the IR construct has already been formed.
     257             :   ///
     258             :   /// The advantages are that it can inspect the SSA use graph to reason more
     259             :   /// accurately about the cost. For example, all-constant-GEPs can often be
     260             :   /// folded into a load or other instruction, but if they are used in some
     261             :   /// other context they may not be folded. This routine can distinguish such
     262             :   /// cases.
     263             :   ///
     264             :   /// \p Operands is a list of operands which can be a result of transformations
     265             :   /// of the current operands. The number of the operands on the list must equal
     266             :   /// to the number of the current operands the IR user has. Their order on the
     267             :   /// list must be the same as the order of the current operands the IR user
     268             :   /// has.
     269             :   ///
     270             :   /// The returned cost is defined in terms of \c TargetCostConstants, see its
     271             :   /// comments for a detailed explanation of the cost values.
     272             :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
     273             : 
     274             :   /// This is a helper function which calls the two-argument getUserCost
     275             :   /// with \p Operands which are the current operands U has.
     276     3328676 :   int getUserCost(const User *U) const {
     277             :     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
     278             :                                            U->value_op_end());
     279     6657352 :     return getUserCost(U, Operands);
     280             :   }
     281             : 
     282             :   /// Return true if branch divergence exists.
     283             :   ///
     284             :   /// Branch divergence has a significantly negative impact on GPU performance
     285             :   /// when threads in the same wavefront take different paths due to conditional
     286             :   /// branches.
     287             :   bool hasBranchDivergence() const;
     288             : 
     289             :   /// Returns whether V is a source of divergence.
     290             :   ///
     291             :   /// This function provides the target-dependent information for
     292             :   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
     293             :   /// builds the dependency graph, and then runs the reachability algorithm
     294             :   /// starting with the sources of divergence.
     295             :   bool isSourceOfDivergence(const Value *V) const;
     296             : 
     297             :   // Returns true for the target specific
     298             :   // set of operations which produce uniform result
     299             :   // even taking non-unform arguments
     300             :   bool isAlwaysUniform(const Value *V) const;
     301             : 
     302             :   /// Returns the address space ID for a target's 'flat' address space. Note
     303             :   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
     304             :   /// refers to as the generic address space. The flat address space is a
     305             :   /// generic address space that can be used access multiple segments of memory
     306             :   /// with different address spaces. Access of a memory location through a
     307             :   /// pointer with this address space is expected to be legal but slower
     308             :   /// compared to the same memory location accessed through a pointer with a
     309             :   /// different address space.
     310             :   //
     311             :   /// This is for targets with different pointer representations which can
     312             :   /// be converted with the addrspacecast instruction. If a pointer is converted
     313             :   /// to this address space, optimizations should attempt to replace the access
     314             :   /// with the source address space.
     315             :   ///
     316             :   /// \returns ~0u if the target does not have such a flat address space to
     317             :   /// optimize away.
     318             :   unsigned getFlatAddressSpace() const;
     319             : 
     320             :   /// Test whether calls to a function lower to actual program function
     321             :   /// calls.
     322             :   ///
     323             :   /// The idea is to test whether the program is likely to require a 'call'
     324             :   /// instruction or equivalent in order to call the given function.
     325             :   ///
     326             :   /// FIXME: It's not clear that this is a good or useful query API. Client's
     327             :   /// should probably move to simpler cost metrics using the above.
     328             :   /// Alternatively, we could split the cost interface into distinct code-size
     329             :   /// and execution-speed costs. This would allow modelling the core of this
     330             :   /// query more accurately as a call is a single small instruction, but
     331             :   /// incurs significant execution cost.
     332             :   bool isLoweredToCall(const Function *F) const;
     333             : 
     334             :   struct LSRCost {
     335             :     /// TODO: Some of these could be merged. Also, a lexical ordering
     336             :     /// isn't always optimal.
     337             :     unsigned Insns;
     338             :     unsigned NumRegs;
     339             :     unsigned AddRecCost;
     340             :     unsigned NumIVMuls;
     341             :     unsigned NumBaseAdds;
     342             :     unsigned ImmCost;
     343             :     unsigned SetupCost;
     344             :     unsigned ScaleCost;
     345             :   };
     346             : 
     347             :   /// Parameters that control the generic loop unrolling transformation.
     348             :   struct UnrollingPreferences {
     349             :     /// The cost threshold for the unrolled loop. Should be relative to the
     350             :     /// getUserCost values returned by this API, and the expectation is that
     351             :     /// the unrolled loop's instructions when run through that interface should
     352             :     /// not exceed this cost. However, this is only an estimate. Also, specific
     353             :     /// loops may be unrolled even with a cost above this threshold if deemed
     354             :     /// profitable. Set this to UINT_MAX to disable the loop body cost
     355             :     /// restriction.
     356             :     unsigned Threshold;
     357             :     /// If complete unrolling will reduce the cost of the loop, we will boost
     358             :     /// the Threshold by a certain percent to allow more aggressive complete
     359             :     /// unrolling. This value provides the maximum boost percentage that we
     360             :     /// can apply to Threshold (The value should be no less than 100).
     361             :     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
     362             :     ///                                    MaxPercentThresholdBoost / 100)
     363             :     /// E.g. if complete unrolling reduces the loop execution time by 50%
     364             :     /// then we boost the threshold by the factor of 2x. If unrolling is not
     365             :     /// expected to reduce the running time, then we do not increase the
     366             :     /// threshold.
     367             :     unsigned MaxPercentThresholdBoost;
     368             :     /// The cost threshold for the unrolled loop when optimizing for size (set
     369             :     /// to UINT_MAX to disable).
     370             :     unsigned OptSizeThreshold;
     371             :     /// The cost threshold for the unrolled loop, like Threshold, but used
     372             :     /// for partial/runtime unrolling (set to UINT_MAX to disable).
     373             :     unsigned PartialThreshold;
     374             :     /// The cost threshold for the unrolled loop when optimizing for size, like
     375             :     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
     376             :     /// UINT_MAX to disable).
     377             :     unsigned PartialOptSizeThreshold;
     378             :     /// A forced unrolling factor (the number of concatenated bodies of the
     379             :     /// original loop in the unrolled loop body). When set to 0, the unrolling
     380             :     /// transformation will select an unrolling factor based on the current cost
     381             :     /// threshold and other factors.
     382             :     unsigned Count;
     383             :     /// A forced peeling factor (the number of bodied of the original loop
     384             :     /// that should be peeled off before the loop body). When set to 0, the
     385             :     /// unrolling transformation will select a peeling factor based on profile
     386             :     /// information and other factors.
     387             :     unsigned PeelCount;
     388             :     /// Default unroll count for loops with run-time trip count.
     389             :     unsigned DefaultUnrollRuntimeCount;
     390             :     // Set the maximum unrolling factor. The unrolling factor may be selected
     391             :     // using the appropriate cost threshold, but may not exceed this number
     392             :     // (set to UINT_MAX to disable). This does not apply in cases where the
     393             :     // loop is being fully unrolled.
     394             :     unsigned MaxCount;
     395             :     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     396             :     /// applies even if full unrolling is selected. This allows a target to fall
     397             :     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     398             :     unsigned FullUnrollMaxCount;
     399             :     // Represents number of instructions optimized when "back edge"
     400             :     // becomes "fall through" in unrolled loop.
     401             :     // For now we count a conditional branch on a backedge and a comparison
     402             :     // feeding it.
     403             :     unsigned BEInsns;
     404             :     /// Allow partial unrolling (unrolling of loops to expand the size of the
     405             :     /// loop body, not only to eliminate small constant-trip-count loops).
     406             :     bool Partial;
     407             :     /// Allow runtime unrolling (unrolling of loops to expand the size of the
     408             :     /// loop body even when the number of loop iterations is not known at
     409             :     /// compile time).
     410             :     bool Runtime;
     411             :     /// Allow generation of a loop remainder (extra iterations after unroll).
     412             :     bool AllowRemainder;
     413             :     /// Allow emitting expensive instructions (such as divisions) when computing
     414             :     /// the trip count of a loop for runtime unrolling.
     415             :     bool AllowExpensiveTripCount;
     416             :     /// Apply loop unroll on any kind of loop
     417             :     /// (mainly to loops that fail runtime unrolling).
     418             :     bool Force;
     419             :     /// Allow using trip count upper bound to unroll loops.
     420             :     bool UpperBound;
     421             :     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     422             :     bool AllowPeeling;
     423             :     /// Allow unrolling of all the iterations of the runtime loop remainder.
     424             :     bool UnrollRemainder;
     425             :   };
     426             : 
     427             :   /// Get target-customized preferences for the generic loop unrolling
     428             :   /// transformation. The caller will initialize UP with the current
     429             :   /// target-independent defaults.
     430             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
     431             :                                UnrollingPreferences &UP) const;
     432             : 
     433             :   /// @}
     434             : 
     435             :   /// \name Scalar Target Information
     436             :   /// @{
     437             : 
     438             :   /// Flags indicating the kind of support for population count.
     439             :   ///
     440             :   /// Compared to the SW implementation, HW support is supposed to
     441             :   /// significantly boost the performance when the population is dense, and it
     442             :   /// may or may not degrade performance if the population is sparse. A HW
     443             :   /// support is considered as "Fast" if it can outperform, or is on a par
     444             :   /// with, SW implementation when the population is sparse; otherwise, it is
     445             :   /// considered as "Slow".
     446             :   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
     447             : 
     448             :   /// Return true if the specified immediate is legal add immediate, that
     449             :   /// is the target has add instructions which can add a register with the
     450             :   /// immediate without having to materialize the immediate into a register.
     451             :   bool isLegalAddImmediate(int64_t Imm) const;
     452             : 
     453             :   /// Return true if the specified immediate is legal icmp immediate,
     454             :   /// that is the target has icmp instructions which can compare a register
     455             :   /// against the immediate without having to materialize the immediate into a
     456             :   /// register.
     457             :   bool isLegalICmpImmediate(int64_t Imm) const;
     458             : 
     459             :   /// Return true if the addressing mode represented by AM is legal for
     460             :   /// this target, for a load/store of the specified type.
     461             :   /// The type may be VoidTy, in which case only return true if the addressing
     462             :   /// mode is legal for a load/store of any legal type.
     463             :   /// If target returns true in LSRWithInstrQueries(), I may be valid.
     464             :   /// TODO: Handle pre/postinc as well.
     465             :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     466             :                              bool HasBaseReg, int64_t Scale,
     467             :                              unsigned AddrSpace = 0,
     468             :                              Instruction *I = nullptr) const;
     469             : 
     470             :   /// Return true if LSR cost of C1 is lower than C1.
     471             :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
     472             :                      TargetTransformInfo::LSRCost &C2) const;
     473             : 
     474             :   /// Return true if the target can fuse a compare and branch.
     475             :   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
     476             :   /// calculation for the instructions in a loop.
     477             :   bool canMacroFuseCmp() const;
     478             : 
     479             :   /// \return True is LSR should make efforts to create/preserve post-inc
     480             :   /// addressing mode expressions.
     481             :   bool shouldFavorPostInc() const;
     482             : 
     483             :   /// Return true if the target supports masked load/store
     484             :   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
     485             :   bool isLegalMaskedStore(Type *DataType) const;
     486             :   bool isLegalMaskedLoad(Type *DataType) const;
     487             : 
     488             :   /// Return true if the target supports masked gather/scatter
     489             :   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
     490             :   /// bits scalar type.
     491             :   bool isLegalMaskedScatter(Type *DataType) const;
     492             :   bool isLegalMaskedGather(Type *DataType) const;
     493             : 
     494             :   /// Return true if the target has a unified operation to calculate division
     495             :   /// and remainder. If so, the additional implicit multiplication and
     496             :   /// subtraction required to calculate a remainder from division are free. This
     497             :   /// can enable more aggressive transformations for division and remainder than
     498             :   /// would typically be allowed using throughput or size cost models.
     499             :   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
     500             : 
     501             :   /// Return true if the given instruction (assumed to be a memory access
     502             :   /// instruction) has a volatile variant. If that's the case then we can avoid
     503             :   /// addrspacecast to generic AS for volatile loads/stores. Default
     504             :   /// implementation returns false, which prevents address space inference for
     505             :   /// volatile loads/stores.
     506             :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
     507             : 
     508             :   /// Return true if target doesn't mind addresses in vectors.
     509             :   bool prefersVectorizedAddressing() const;
     510             : 
     511             :   /// Return the cost of the scaling factor used in the addressing
     512             :   /// mode represented by AM for this target, for a load/store
     513             :   /// of the specified type.
     514             :   /// If the AM is supported, the return value must be >= 0.
     515             :   /// If the AM is not supported, it returns a negative value.
     516             :   /// TODO: Handle pre/postinc as well.
     517             :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
     518             :                            bool HasBaseReg, int64_t Scale,
     519             :                            unsigned AddrSpace = 0) const;
     520             : 
     521             :   /// Return true if the loop strength reduce pass should make
     522             :   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
     523             :   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
     524             :   /// immediate offset and no index register.
     525             :   bool LSRWithInstrQueries() const;
     526             : 
     527             :   /// Return true if it's free to truncate a value of type Ty1 to type
     528             :   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
     529             :   /// by referencing its sub-register AX.
     530             :   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
     531             : 
     532             :   /// Return true if it is profitable to hoist instruction in the
     533             :   /// then/else to before if.
     534             :   bool isProfitableToHoist(Instruction *I) const;
     535             : 
     536             :   bool useAA() const;
     537             : 
     538             :   /// Return true if this type is legal.
     539             :   bool isTypeLegal(Type *Ty) const;
     540             : 
     541             :   /// Returns the target's jmp_buf alignment in bytes.
     542             :   unsigned getJumpBufAlignment() const;
     543             : 
     544             :   /// Returns the target's jmp_buf size in bytes.
     545             :   unsigned getJumpBufSize() const;
     546             : 
     547             :   /// Return true if switches should be turned into lookup tables for the
     548             :   /// target.
     549             :   bool shouldBuildLookupTables() const;
     550             : 
     551             :   /// Return true if switches should be turned into lookup tables
     552             :   /// containing this constant value for the target.
     553             :   bool shouldBuildLookupTablesForConstant(Constant *C) const;
     554             : 
     555             :   /// Return true if the input function which is cold at all call sites,
     556             :   ///  should use coldcc calling convention.
     557             :   bool useColdCCForColdCall(Function &F) const;
     558             : 
     559             :   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
     560             : 
     561             :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
     562             :                                             unsigned VF) const;
     563             : 
     564             :   /// If target has efficient vector element load/store instructions, it can
     565             :   /// return true here so that insertion/extraction costs are not added to
     566             :   /// the scalarization cost of a load/store.
     567             :   bool supportsEfficientVectorElementLoadStore() const;
     568             : 
     569             :   /// Don't restrict interleaved unrolling to small loops.
     570             :   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
     571             : 
     572             :   /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
     573             :   /// true if this is the expansion of memcmp(p1, p2, s) == 0.
     574       30676 :   struct MemCmpExpansionOptions {
     575             :     // The list of available load sizes (in bytes), sorted in decreasing order.
     576             :     SmallVector<unsigned, 8> LoadSizes;
     577             :   };
     578             :   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
     579             : 
     580             :   /// Enable matching of interleaved access groups.
     581             :   bool enableInterleavedAccessVectorization() const;
     582             : 
     583             :   /// Indicate that it is potentially unsafe to automatically vectorize
     584             :   /// floating-point operations because the semantics of vector and scalar
     585             :   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
     586             :   /// does not support IEEE-754 denormal numbers, while depending on the
     587             :   /// platform, scalar floating-point math does.
     588             :   /// This applies to floating-point math operations and calls, not memory
     589             :   /// operations, shuffles, or casts.
     590             :   bool isFPVectorizationPotentiallyUnsafe() const;
     591             : 
     592             :   /// Determine if the target supports unaligned memory accesses.
     593             :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
     594             :                                       unsigned BitWidth, unsigned AddressSpace = 0,
     595             :                                       unsigned Alignment = 1,
     596             :                                       bool *Fast = nullptr) const;
     597             : 
     598             :   /// Return hardware support for population count.
     599             :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
     600             : 
     601             :   /// Return true if the hardware has a fast square-root instruction.
     602             :   bool haveFastSqrt(Type *Ty) const;
     603             : 
     604             :   /// Return true if it is faster to check if a floating-point value is NaN
     605             :   /// (or not-NaN) versus a comparison against a constant FP zero value.
     606             :   /// Targets should override this if materializing a 0.0 for comparison is
     607             :   /// generally as cheap as checking for ordered/unordered.
     608             :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
     609             : 
     610             :   /// Return the expected cost of supporting the floating point operation
     611             :   /// of the specified type.
     612             :   int getFPOpCost(Type *Ty) const;
     613             : 
     614             :   /// Return the expected cost of materializing for the given integer
     615             :   /// immediate of the specified type.
     616             :   int getIntImmCost(const APInt &Imm, Type *Ty) const;
     617             : 
     618             :   /// Return the expected cost of materialization for the given integer
     619             :   /// immediate of the specified type for a given instruction. The cost can be
     620             :   /// zero if the immediate can be folded into the specified instruction.
     621             :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     622             :                     Type *Ty) const;
     623             :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     624             :                     Type *Ty) const;
     625             : 
     626             :   /// Return the expected cost for the given integer when optimising
     627             :   /// for size. This is different than the other integer immediate cost
     628             :   /// functions in that it is subtarget agnostic. This is useful when you e.g.
     629             :   /// target one ISA such as Aarch32 but smaller encodings could be possible
     630             :   /// with another such as Thumb. This return value is used as a penalty when
     631             :   /// the total costs for a constant is calculated (the bigger the cost, the
     632             :   /// more beneficial constant hoisting is).
     633             :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
     634             :                             Type *Ty) const;
     635             :   /// @}
     636             : 
     637             :   /// \name Vector Target Information
     638             :   /// @{
     639             : 
     640             :   /// The various kinds of shuffle patterns for vector queries.
     641             :   enum ShuffleKind {
     642             :     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     643             :     SK_Reverse,         ///< Reverse the order of the vector.
     644             :     SK_Alternate,       ///< Choose alternate elements from vector.
     645             :     SK_Transpose,       ///< Transpose two vectors.
     646             :     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     647             :     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
     648             :     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
     649             :                         ///< with any shuffle mask.
     650             :     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
     651             :                         ///< shuffle mask.
     652             :   };
     653             : 
     654             :   /// Additional information about an operand's possible values.
     655             :   enum OperandValueKind {
     656             :     OK_AnyValue,               // Operand can have any value.
     657             :     OK_UniformValue,           // Operand is uniform (splat of a value).
     658             :     OK_UniformConstantValue,   // Operand is uniform constant.
     659             :     OK_NonUniformConstantValue // Operand is a non uniform constant value.
     660             :   };
     661             : 
     662             :   /// Additional properties of an operand's values.
     663             :   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
     664             : 
     665             :   /// \return The number of scalar or vector registers that the target has.
     666             :   /// If 'Vectors' is true, it returns the number of vector registers. If it is
     667             :   /// set to false, it returns the number of scalar registers.
     668             :   unsigned getNumberOfRegisters(bool Vector) const;
     669             : 
     670             :   /// \return The width of the largest scalar or vector register type.
     671             :   unsigned getRegisterBitWidth(bool Vector) const;
     672             : 
     673             :   /// \return The width of the smallest vector register type.
     674             :   unsigned getMinVectorRegisterBitWidth() const;
     675             : 
     676             :   /// \return True if the vectorization factor should be chosen to
     677             :   /// make the vector of the smallest element type match the size of a
     678             :   /// vector register. For wider element types, this could result in
     679             :   /// creating vectors that span multiple vector registers.
     680             :   /// If false, the vectorization factor will be chosen based on the
     681             :   /// size of the widest element type.
     682             :   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
     683             : 
     684             :   /// \return The minimum vectorization factor for types of given element
     685             :   /// bit width, or 0 if there is no mimimum VF. The returned value only
     686             :   /// applies when shouldMaximizeVectorBandwidth returns true.
     687             :   unsigned getMinimumVF(unsigned ElemWidth) const;
     688             : 
     689             :   /// \return True if it should be considered for address type promotion.
     690             :   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
     691             :   /// profitable without finding other extensions fed by the same input.
     692             :   bool shouldConsiderAddressTypePromotion(
     693             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
     694             : 
     695             :   /// \return The size of a cache line in bytes.
     696             :   unsigned getCacheLineSize() const;
     697             : 
     698             :   /// The possible cache levels
     699             :   enum class CacheLevel {
     700             :     L1D,   // The L1 data cache
     701             :     L2D,   // The L2 data cache
     702             : 
     703             :     // We currently do not model L3 caches, as their sizes differ widely between
     704             :     // microarchitectures. Also, we currently do not have a use for L3 cache
     705             :     // size modeling yet.
     706             :   };
     707             : 
     708             :   /// \return The size of the cache level in bytes, if available.
     709             :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
     710             : 
     711             :   /// \return The associativity of the cache level, if available.
     712             :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
     713             : 
     714             :   /// \return How much before a load we should place the prefetch instruction.
     715             :   /// This is currently measured in number of instructions.
     716             :   unsigned getPrefetchDistance() const;
     717             : 
     718             :   /// \return Some HW prefetchers can handle accesses up to a certain constant
     719             :   /// stride.  This is the minimum stride in bytes where it makes sense to start
     720             :   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
     721             :   unsigned getMinPrefetchStride() const;
     722             : 
     723             :   /// \return The maximum number of iterations to prefetch ahead.  If the
     724             :   /// required number of iterations is more than this number, no prefetching is
     725             :   /// performed.
     726             :   unsigned getMaxPrefetchIterationsAhead() const;
     727             : 
     728             :   /// \return The maximum interleave factor that any transform should try to
     729             :   /// perform for this target. This number depends on the level of parallelism
     730             :   /// and the number of execution units in the CPU.
     731             :   unsigned getMaxInterleaveFactor(unsigned VF) const;
     732             : 
     733             :   /// This is an approximation of reciprocal throughput of a math/logic op.
     734             :   /// A higher cost indicates less expected throughput.
     735             :   /// From Agner Fog's guides, reciprocal throughput is "the average number of
     736             :   /// clock cycles per instruction when the instructions are not part of a
     737             :   /// limiting dependency chain."
     738             :   /// Therefore, costs should be scaled to account for multiple execution units
     739             :   /// on the target that can process this type of instruction. For example, if
     740             :   /// there are 5 scalar integer units and 2 vector integer units that can
     741             :   /// calculate an 'add' in a single cycle, this model should indicate that the
     742             :   /// cost of the vector add instruction is 2.5 times the cost of the scalar
     743             :   /// add instruction.
     744             :   /// \p Args is an optional argument which holds the instruction operands
     745             :   /// values so the TTI can analyze those values searching for special
     746             :   /// cases or optimizations based on those values.
     747             :   int getArithmeticInstrCost(
     748             :       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
     749             :       OperandValueKind Opd2Info = OK_AnyValue,
     750             :       OperandValueProperties Opd1PropInfo = OP_None,
     751             :       OperandValueProperties Opd2PropInfo = OP_None,
     752             :       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
     753             : 
     754             :   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
     755             :   /// The index and subtype parameters are used by the subvector insertion and
     756             :   /// extraction shuffle kinds.
     757             :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
     758             :                      Type *SubTp = nullptr) const;
     759             : 
     760             :   /// \return The expected cost of cast instructions, such as bitcast, trunc,
     761             :   /// zext, etc. If there is an existing instruction that holds Opcode, it
     762             :   /// may be passed in the 'I' parameter.
     763             :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     764             :                        const Instruction *I = nullptr) const;
     765             : 
     766             :   /// \return The expected cost of a sign- or zero-extended vector extract. Use
     767             :   /// -1 to indicate that there is no information about the index value.
     768             :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
     769             :                                unsigned Index = -1) const;
     770             : 
     771             :   /// \return The expected cost of control-flow related instructions such as
     772             :   /// Phi, Ret, Br.
     773             :   int getCFInstrCost(unsigned Opcode) const;
     774             : 
     775             :   /// \returns The expected cost of compare and select instructions. If there
     776             :   /// is an existing instruction that holds Opcode, it may be passed in the
     777             :   /// 'I' parameter.
     778             :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     779             :                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
     780             : 
     781             :   /// \return The expected cost of vector Insert and Extract.
     782             :   /// Use -1 to indicate that there is no information on the index value.
     783             :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
     784             : 
     785             :   /// \return The cost of Load and Store instructions.
     786             :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     787             :                       unsigned AddressSpace, const Instruction *I = nullptr) const;
     788             : 
     789             :   /// \return The cost of masked Load and Store instructions.
     790             :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     791             :                             unsigned AddressSpace) const;
     792             : 
     793             :   /// \return The cost of Gather or Scatter operation
     794             :   /// \p Opcode - is a type of memory access Load or Store
     795             :   /// \p DataTy - a vector type of the data to be loaded or stored
     796             :   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
     797             :   /// \p VariableMask - true when the memory access is predicated with a mask
     798             :   ///                   that is not a compile-time constant
     799             :   /// \p Alignment - alignment of single element
     800             :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
     801             :                              bool VariableMask, unsigned Alignment) const;
     802             : 
     803             :   /// \return The cost of the interleaved memory operation.
     804             :   /// \p Opcode is the memory operation code
     805             :   /// \p VecTy is the vector type of the interleaved access.
     806             :   /// \p Factor is the interleave factor
     807             :   /// \p Indices is the indices for interleaved load members (as interleaved
     808             :   ///    load allows gaps)
     809             :   /// \p Alignment is the alignment of the memory operation
     810             :   /// \p AddressSpace is address space of the pointer.
     811             :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
     812             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
     813             :                                  unsigned AddressSpace) const;
     814             : 
     815             :   /// Calculate the cost of performing a vector reduction.
     816             :   ///
     817             :   /// This is the cost of reducing the vector value of type \p Ty to a scalar
     818             :   /// value using the operation denoted by \p Opcode. The form of the reduction
     819             :   /// can either be a pairwise reduction or a reduction that splits the vector
     820             :   /// at every reduction level.
     821             :   ///
     822             :   /// Pairwise:
     823             :   ///  (v0, v1, v2, v3)
     824             :   ///  ((v0+v1), (v2+v3), undef, undef)
     825             :   /// Split:
     826             :   ///  (v0, v1, v2, v3)
     827             :   ///  ((v0+v2), (v1+v3), undef, undef)
     828             :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
     829             :                                  bool IsPairwiseForm) const;
     830             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
     831             :                              bool IsUnsigned) const;
     832             : 
     833             :   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
     834             :   /// Three cases are handled: 1. scalar instruction 2. vector instruction
     835             :   /// 3. scalar instruction which is to be vectorized with VF.
     836             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     837             :                             ArrayRef<Value *> Args, FastMathFlags FMF,
     838             :                             unsigned VF = 1) const;
     839             : 
     840             :   /// \returns The cost of Intrinsic instructions. Types analysis only.
     841             :   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
     842             :   /// arguments and the return value will be computed based on types.
     843             :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
     844             :                             ArrayRef<Type *> Tys, FastMathFlags FMF,
     845             :                             unsigned ScalarizationCostPassed = UINT_MAX) const;
     846             : 
     847             :   /// \returns The cost of Call instructions.
     848             :   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
     849             : 
     850             :   /// \returns The number of pieces into which the provided type must be
     851             :   /// split during legalization. Zero is returned when the answer is unknown.
     852             :   unsigned getNumberOfParts(Type *Tp) const;
     853             : 
     854             :   /// \returns The cost of the address computation. For most targets this can be
     855             :   /// merged into the instruction indexing mode. Some targets might want to
     856             :   /// distinguish between address computation for memory operations on vector
     857             :   /// types and scalar types. Such targets should override this function.
     858             :   /// The 'SE' parameter holds pointer for the scalar evolution object which
     859             :   /// is used in order to get the Ptr step value in case of constant stride.
     860             :   /// The 'Ptr' parameter holds SCEV of the access pointer.
     861             :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
     862             :                                 const SCEV *Ptr = nullptr) const;
     863             : 
     864             :   /// \returns The cost, if any, of keeping values of the given types alive
     865             :   /// over a callsite.
     866             :   ///
     867             :   /// Some types may require the use of register classes that do not have
     868             :   /// any callee-saved registers, so would require a spill and fill.
     869             :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
     870             : 
     871             :   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
     872             :   /// will contain additional information - whether the intrinsic may write
     873             :   /// or read to memory, volatility and the pointer.  Info is undefined
     874             :   /// if false is returned.
     875             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     876             : 
     877             :   /// \returns The maximum element size, in bytes, for an element
     878             :   /// unordered-atomic memory intrinsic.
     879             :   unsigned getAtomicMemIntrinsicMaxElementSize() const;
     880             : 
     881             :   /// \returns A value which is the result of the given memory intrinsic.  New
     882             :   /// instructions may be created to extract the result from the given intrinsic
     883             :   /// memory operation.  Returns nullptr if the target cannot create a result
     884             :   /// from the given intrinsic.
     885             :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     886             :                                            Type *ExpectedType) const;
     887             : 
     888             :   /// \returns The type to use in a loop expansion of a memcpy call.
     889             :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
     890             :                                   unsigned SrcAlign, unsigned DestAlign) const;
     891             : 
     892             :   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
     893             :   /// \param RemainingBytes The number of bytes to copy.
     894             :   ///
     895             :   /// Calculates the operand types to use when copying \p RemainingBytes of
     896             :   /// memory, where source and destination alignments are \p SrcAlign and
     897             :   /// \p DestAlign respectively.
     898             :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
     899             :                                          LLVMContext &Context,
     900             :                                          unsigned RemainingBytes,
     901             :                                          unsigned SrcAlign,
     902             :                                          unsigned DestAlign) const;
     903             : 
     904             :   /// \returns True if the two functions have compatible attributes for inlining
     905             :   /// purposes.
     906             :   bool areInlineCompatible(const Function *Caller,
     907             :                            const Function *Callee) const;
     908             : 
     909             :   /// The type of load/store indexing.
     910             :   enum MemIndexedMode {
     911             :     MIM_Unindexed,  ///< No indexing.
     912             :     MIM_PreInc,     ///< Pre-incrementing.
     913             :     MIM_PreDec,     ///< Pre-decrementing.
     914             :     MIM_PostInc,    ///< Post-incrementing.
     915             :     MIM_PostDec     ///< Post-decrementing.
     916             :   };
     917             : 
     918             :   /// \returns True if the specified indexed load for the given type is legal.
     919             :   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
     920             : 
     921             :   /// \returns True if the specified indexed store for the given type is legal.
     922             :   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
     923             : 
     924             :   /// \returns The bitwidth of the largest vector type that should be used to
     925             :   /// load/store in the given address space.
     926             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     927             : 
     928             :   /// \returns True if the load instruction is legal to vectorize.
     929             :   bool isLegalToVectorizeLoad(LoadInst *LI) const;
     930             : 
     931             :   /// \returns True if the store instruction is legal to vectorize.
     932             :   bool isLegalToVectorizeStore(StoreInst *SI) const;
     933             : 
     934             :   /// \returns True if it is legal to vectorize the given load chain.
     935             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     936             :                                    unsigned Alignment,
     937             :                                    unsigned AddrSpace) const;
     938             : 
     939             :   /// \returns True if it is legal to vectorize the given store chain.
     940             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     941             :                                     unsigned Alignment,
     942             :                                     unsigned AddrSpace) const;
     943             : 
     944             :   /// \returns The new vector factor value if the target doesn't support \p
     945             :   /// SizeInBytes loads or has a better vector factor.
     946             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     947             :                                unsigned ChainSizeInBytes,
     948             :                                VectorType *VecTy) const;
     949             : 
     950             :   /// \returns The new vector factor value if the target doesn't support \p
     951             :   /// SizeInBytes stores or has a better vector factor.
     952             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     953             :                                 unsigned ChainSizeInBytes,
     954             :                                 VectorType *VecTy) const;
     955             : 
     956             :   /// Flags describing the kind of vector reduction.
     957             :   struct ReductionFlags {
     958         181 :     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
     959             :     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
     960             :     bool IsSigned; ///< Whether the operation is a signed int reduction.
     961             :     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
     962             :   };
     963             : 
     964             :   /// \returns True if the target wants to handle the given reduction idiom in
     965             :   /// the intrinsics form instead of the shuffle form.
     966             :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
     967             :                              ReductionFlags Flags) const;
     968             : 
     969             :   /// \returns True if the target wants to expand the given reduction intrinsic
     970             :   /// into a shuffle sequence.
     971             :   bool shouldExpandReduction(const IntrinsicInst *II) const;
     972             :   /// @}
     973             : 
     974             : private:
     975             :   /// Estimate the latency of specified instruction.
     976             :   /// Returns 1 as the default value.
     977             :   int getInstructionLatency(const Instruction *I) const;
     978             : 
     979             :   /// Returns the expected throughput cost of the instruction.
     980             :   /// Returns -1 if the cost is unknown.
     981             :   int getInstructionThroughput(const Instruction *I) const;
     982             : 
     983             :   /// The abstract base class used to type erase specific TTI
     984             :   /// implementations.
     985             :   class Concept;
     986             : 
     987             :   /// The template model for the base class which wraps a concrete
     988             :   /// implementation in a type erased interface.
     989             :   template <typename T> class Model;
     990             : 
     991             :   std::unique_ptr<Concept> TTIImpl;
     992             : };
     993             : 
     994             : class TargetTransformInfo::Concept {
     995             : public:
     996             :   virtual ~Concept() = 0;
     997             :   virtual const DataLayout &getDataLayout() const = 0;
     998             :   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
     999             :   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
    1000             :                          ArrayRef<const Value *> Operands) = 0;
    1001             :   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
    1002             :   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
    1003             :   virtual int getCallCost(const Function *F, int NumArgs) = 0;
    1004             :   virtual int getCallCost(const Function *F,
    1005             :                           ArrayRef<const Value *> Arguments) = 0;
    1006             :   virtual unsigned getInliningThresholdMultiplier() = 0;
    1007             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1008             :                                ArrayRef<Type *> ParamTys) = 0;
    1009             :   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1010             :                                ArrayRef<const Value *> Arguments) = 0;
    1011             :   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1012             :                                                     unsigned &JTSize) = 0;
    1013             :   virtual int
    1014             :   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
    1015             :   virtual bool hasBranchDivergence() = 0;
    1016             :   virtual bool isSourceOfDivergence(const Value *V) = 0;
    1017             :   virtual bool isAlwaysUniform(const Value *V) = 0;
    1018             :   virtual unsigned getFlatAddressSpace() = 0;
    1019             :   virtual bool isLoweredToCall(const Function *F) = 0;
    1020             :   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
    1021             :                                        UnrollingPreferences &UP) = 0;
    1022             :   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
    1023             :   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
    1024             :   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
    1025             :                                      int64_t BaseOffset, bool HasBaseReg,
    1026             :                                      int64_t Scale,
    1027             :                                      unsigned AddrSpace,
    1028             :                                      Instruction *I) = 0;
    1029             :   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1030             :                              TargetTransformInfo::LSRCost &C2) = 0;
    1031             :   virtual bool canMacroFuseCmp() = 0;
    1032             :   virtual bool shouldFavorPostInc() const = 0;
    1033             :   virtual bool isLegalMaskedStore(Type *DataType) = 0;
    1034             :   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
    1035             :   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
    1036             :   virtual bool isLegalMaskedGather(Type *DataType) = 0;
    1037             :   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
    1038             :   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
    1039             :   virtual bool prefersVectorizedAddressing() = 0;
    1040             :   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
    1041             :                                    int64_t BaseOffset, bool HasBaseReg,
    1042             :                                    int64_t Scale, unsigned AddrSpace) = 0;
    1043             :   virtual bool LSRWithInstrQueries() = 0;
    1044             :   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
    1045             :   virtual bool isProfitableToHoist(Instruction *I) = 0;
    1046             :   virtual bool useAA() = 0;
    1047             :   virtual bool isTypeLegal(Type *Ty) = 0;
    1048             :   virtual unsigned getJumpBufAlignment() = 0;
    1049             :   virtual unsigned getJumpBufSize() = 0;
    1050             :   virtual bool shouldBuildLookupTables() = 0;
    1051             :   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
    1052             :   virtual bool useColdCCForColdCall(Function &F) = 0;
    1053             :   virtual unsigned
    1054             :   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
    1055             :   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1056             :                                                     unsigned VF) = 0;
    1057             :   virtual bool supportsEfficientVectorElementLoadStore() = 0;
    1058             :   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    1059             :   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
    1060             :       bool IsZeroCmp) const = 0;
    1061             :   virtual bool enableInterleavedAccessVectorization() = 0;
    1062             :   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
    1063             :   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1064             :                                               unsigned BitWidth,
    1065             :                                               unsigned AddressSpace,
    1066             :                                               unsigned Alignment,
    1067             :                                               bool *Fast) = 0;
    1068             :   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    1069             :   virtual bool haveFastSqrt(Type *Ty) = 0;
    1070             :   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
    1071             :   virtual int getFPOpCost(Type *Ty) = 0;
    1072             :   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1073             :                                     Type *Ty) = 0;
    1074             :   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    1075             :   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1076             :                             Type *Ty) = 0;
    1077             :   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1078             :                             Type *Ty) = 0;
    1079             :   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    1080             :   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
    1081             :   virtual unsigned getMinVectorRegisterBitWidth() = 0;
    1082             :   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
    1083             :   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
    1084             :   virtual bool shouldConsiderAddressTypePromotion(
    1085             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
    1086             :   virtual unsigned getCacheLineSize() = 0;
    1087             :   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
    1088             :   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
    1089             :   virtual unsigned getPrefetchDistance() = 0;
    1090             :   virtual unsigned getMinPrefetchStride() = 0;
    1091             :   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    1092             :   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    1093             :   virtual unsigned
    1094             :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1095             :                          OperandValueKind Opd2Info,
    1096             :                          OperandValueProperties Opd1PropInfo,
    1097             :                          OperandValueProperties Opd2PropInfo,
    1098             :                          ArrayRef<const Value *> Args) = 0;
    1099             :   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1100             :                              Type *SubTp) = 0;
    1101             :   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1102             :                                const Instruction *I) = 0;
    1103             :   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
    1104             :                                        VectorType *VecTy, unsigned Index) = 0;
    1105             :   virtual int getCFInstrCost(unsigned Opcode) = 0;
    1106             :   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    1107             :                                 Type *CondTy, const Instruction *I) = 0;
    1108             :   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
    1109             :                                  unsigned Index) = 0;
    1110             :   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1111             :                               unsigned AddressSpace, const Instruction *I) = 0;
    1112             :   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    1113             :                                     unsigned Alignment,
    1114             :                                     unsigned AddressSpace) = 0;
    1115             :   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1116             :                                      Value *Ptr, bool VariableMask,
    1117             :                                      unsigned Alignment) = 0;
    1118             :   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
    1119             :                                          unsigned Factor,
    1120             :                                          ArrayRef<unsigned> Indices,
    1121             :                                          unsigned Alignment,
    1122             :                                          unsigned AddressSpace) = 0;
    1123             :   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1124             :                                          bool IsPairwiseForm) = 0;
    1125             :   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1126             :                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
    1127             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1128             :                       ArrayRef<Type *> Tys, FastMathFlags FMF,
    1129             :                       unsigned ScalarizationCostPassed) = 0;
    1130             :   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1131             :          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    1132             :   virtual int getCallInstrCost(Function *F, Type *RetTy,
    1133             :                                ArrayRef<Type *> Tys) = 0;
    1134             :   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    1135             :   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1136             :                                         const SCEV *Ptr) = 0;
    1137             :   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    1138             :   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1139             :                                   MemIntrinsicInfo &Info) = 0;
    1140             :   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
    1141             :   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1142             :                                                    Type *ExpectedType) = 0;
    1143             :   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1144             :                                           unsigned SrcAlign,
    1145             :                                           unsigned DestAlign) const = 0;
    1146             :   virtual void getMemcpyLoopResidualLoweringType(
    1147             :       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    1148             :       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
    1149             :   virtual bool areInlineCompatible(const Function *Caller,
    1150             :                                    const Function *Callee) const = 0;
    1151             :   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
    1152             :   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
    1153             :   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
    1154             :   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
    1155             :   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
    1156             :   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1157             :                                            unsigned Alignment,
    1158             :                                            unsigned AddrSpace) const = 0;
    1159             :   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1160             :                                             unsigned Alignment,
    1161             :                                             unsigned AddrSpace) const = 0;
    1162             :   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1163             :                                        unsigned ChainSizeInBytes,
    1164             :                                        VectorType *VecTy) const = 0;
    1165             :   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1166             :                                         unsigned ChainSizeInBytes,
    1167             :                                         VectorType *VecTy) const = 0;
    1168             :   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1169             :                                      ReductionFlags) const = 0;
    1170             :   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
    1171             :   virtual int getInstructionLatency(const Instruction *I) = 0;
    1172             : };
    1173             : 
    1174             : template <typename T>
    1175             : class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    1176             :   T Impl;
    1177             : 
    1178             : public:
    1179     2760335 :   Model(T Impl) : Impl(std::move(Impl)) {}
    1180     2760241 :   ~Model() override {}
    1181             : 
    1182           0 :   const DataLayout &getDataLayout() const override {
    1183           0 :     return Impl.getDataLayout();
    1184             :   }
    1185             : 
    1186           0 :   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    1187           0 :     return Impl.getOperationCost(Opcode, Ty, OpTy);
    1188             :   }
    1189       34754 :   int getGEPCost(Type *PointeeType, const Value *Ptr,
    1190             :                  ArrayRef<const Value *> Operands) override {
    1191       34754 :     return Impl.getGEPCost(PointeeType, Ptr, Operands);
    1192             :   }
    1193           0 :   int getExtCost(const Instruction *I, const Value *Src) override {
    1194           0 :     return Impl.getExtCost(I, Src);
    1195             :   }
    1196           0 :   int getCallCost(FunctionType *FTy, int NumArgs) override {
    1197           0 :     return Impl.getCallCost(FTy, NumArgs);
    1198             :   }
    1199           0 :   int getCallCost(const Function *F, int NumArgs) override {
    1200           0 :     return Impl.getCallCost(F, NumArgs);
    1201             :   }
    1202           0 :   int getCallCost(const Function *F,
    1203             :                   ArrayRef<const Value *> Arguments) override {
    1204           0 :     return Impl.getCallCost(F, Arguments);
    1205             :   }
    1206      239054 :   unsigned getInliningThresholdMultiplier() override {
    1207      239054 :     return Impl.getInliningThresholdMultiplier();
    1208             :   }
    1209           0 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1210             :                        ArrayRef<Type *> ParamTys) override {
    1211           0 :     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    1212             :   }
    1213           6 :   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    1214             :                        ArrayRef<const Value *> Arguments) override {
    1215           6 :     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    1216             :   }
    1217     3555218 :   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
    1218     3555218 :     return Impl.getUserCost(U, Operands);
    1219             :   }
    1220     1646160 :   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    1221      775432 :   bool isSourceOfDivergence(const Value *V) override {
    1222      775432 :     return Impl.isSourceOfDivergence(V);
    1223             :   }
    1224             : 
    1225      230015 :   bool isAlwaysUniform(const Value *V) override {
    1226      230015 :     return Impl.isAlwaysUniform(V);
    1227             :   }
    1228             : 
    1229       21222 :   unsigned getFlatAddressSpace() override {
    1230       41394 :     return Impl.getFlatAddressSpace();
    1231             :   }
    1232             : 
    1233      962135 :   bool isLoweredToCall(const Function *F) override {
    1234      962135 :     return Impl.isLoweredToCall(F);
    1235             :   }
    1236        6335 :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
    1237             :                                UnrollingPreferences &UP) override {
    1238        6335 :     return Impl.getUnrollingPreferences(L, SE, UP);
    1239             :   }
    1240       24810 :   bool isLegalAddImmediate(int64_t Imm) override {
    1241       24810 :     return Impl.isLegalAddImmediate(Imm);
    1242             :   }
    1243       26099 :   bool isLegalICmpImmediate(int64_t Imm) override {
    1244       26099 :     return Impl.isLegalICmpImmediate(Imm);
    1245             :   }
    1246      394455 :   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1247             :                              bool HasBaseReg, int64_t Scale,
    1248             :                              unsigned AddrSpace,
    1249             :                              Instruction *I) override {
    1250             :     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    1251      394455 :                                       Scale, AddrSpace, I);
    1252             :   }
    1253       94700 :   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
    1254             :                      TargetTransformInfo::LSRCost &C2) override {
    1255       94700 :     return Impl.isLSRCostLess(C1, C2);
    1256             :   }
    1257       24339 :   bool canMacroFuseCmp() override {
    1258       24339 :     return Impl.canMacroFuseCmp();
    1259             :   }
    1260      195979 :   bool shouldFavorPostInc() const override {
    1261      195979 :     return Impl.shouldFavorPostInc();
    1262             :   }
    1263         418 :   bool isLegalMaskedStore(Type *DataType) override {
    1264         418 :     return Impl.isLegalMaskedStore(DataType);
    1265             :   }
    1266         431 :   bool isLegalMaskedLoad(Type *DataType) override {
    1267         431 :     return Impl.isLegalMaskedLoad(DataType);
    1268             :   }
    1269         596 :   bool isLegalMaskedScatter(Type *DataType) override {
    1270         596 :     return Impl.isLegalMaskedScatter(DataType);
    1271             :   }
    1272         713 :   bool isLegalMaskedGather(Type *DataType) override {
    1273         713 :     return Impl.isLegalMaskedGather(DataType);
    1274             :   }
    1275          32 :   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    1276          32 :     return Impl.hasDivRemOp(DataType, IsSigned);
    1277             :   }
    1278         562 :   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
    1279         562 :     return Impl.hasVolatileVariant(I, AddrSpace);
    1280             :   }
    1281        1145 :   bool prefersVectorizedAddressing() override {
    1282        1145 :     return Impl.prefersVectorizedAddressing();
    1283             :   }
    1284       85382 :   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    1285             :                            bool HasBaseReg, int64_t Scale,
    1286             :                            unsigned AddrSpace) override {
    1287             :     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
    1288       85382 :                                      Scale, AddrSpace);
    1289             :   }
    1290       49511 :   bool LSRWithInstrQueries() override {
    1291       49511 :     return Impl.LSRWithInstrQueries();
    1292             :   }
    1293       19641 :   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    1294       19641 :     return Impl.isTruncateFree(Ty1, Ty2);
    1295             :   }
    1296        3413 :   bool isProfitableToHoist(Instruction *I) override {
    1297        3413 :     return Impl.isProfitableToHoist(I);
    1298             :   }
    1299        1116 :   bool useAA() override { return Impl.useAA(); }
    1300         662 :   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    1301           0 :   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    1302           0 :   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    1303         417 :   bool shouldBuildLookupTables() override {
    1304         417 :     return Impl.shouldBuildLookupTables();
    1305             :   }
    1306        1102 :   bool shouldBuildLookupTablesForConstant(Constant *C) override {
    1307        1102 :     return Impl.shouldBuildLookupTablesForConstant(C);
    1308             :   }
    1309           2 :   bool useColdCCForColdCall(Function &F) override {
    1310           2 :     return Impl.useColdCCForColdCall(F);
    1311             :   }
    1312             : 
    1313         716 :   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
    1314             :                                     bool Extract) override {
    1315         716 :     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
    1316             :   }
    1317         754 :   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    1318             :                                             unsigned VF) override {
    1319         754 :     return Impl.getOperandsScalarizationOverhead(Args, VF);
    1320             :   }
    1321             : 
    1322         385 :   bool supportsEfficientVectorElementLoadStore() override {
    1323         385 :     return Impl.supportsEfficientVectorElementLoadStore();
    1324             :   }
    1325             : 
    1326          67 :   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    1327          67 :     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    1328             :   }
    1329      170523 :   const MemCmpExpansionOptions *enableMemCmpExpansion(
    1330             :       bool IsZeroCmp) const override {
    1331      170523 :     return Impl.enableMemCmpExpansion(IsZeroCmp);
    1332             :   }
    1333         840 :   bool enableInterleavedAccessVectorization() override {
    1334         840 :     return Impl.enableInterleavedAccessVectorization();
    1335             :   }
    1336         129 :   bool isFPVectorizationPotentiallyUnsafe() override {
    1337         129 :     return Impl.isFPVectorizationPotentiallyUnsafe();
    1338             :   }
    1339        1065 :   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
    1340             :                                       unsigned BitWidth, unsigned AddressSpace,
    1341             :                                       unsigned Alignment, bool *Fast) override {
    1342             :     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
    1343        1065 :                                                Alignment, Fast);
    1344             :   }
    1345        2351 :   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    1346        2351 :     return Impl.getPopcntSupport(IntTyWidthInBit);
    1347             :   }
    1348          58 :   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    1349             : 
    1350          27 :   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
    1351          27 :     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
    1352             :   }
    1353             : 
    1354         710 :   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
    1355             : 
    1356         106 :   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1357             :                             Type *Ty) override {
    1358         106 :     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
    1359             :   }
    1360          84 :   int getIntImmCost(const APInt &Imm, Type *Ty) override {
    1361          84 :     return Impl.getIntImmCost(Imm, Ty);
    1362             :   }
    1363      446836 :   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    1364             :                     Type *Ty) override {
    1365      446901 :     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    1366             :   }
    1367      142963 :   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    1368             :                     Type *Ty) override {
    1369      142967 :     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    1370             :   }
    1371      186890 :   unsigned getNumberOfRegisters(bool Vector) override {
    1372      186890 :     return Impl.getNumberOfRegisters(Vector);
    1373             :   }
    1374       10306 :   unsigned getRegisterBitWidth(bool Vector) const override {
    1375       10306 :     return Impl.getRegisterBitWidth(Vector);
    1376             :   }
    1377        8735 :   unsigned getMinVectorRegisterBitWidth() override {
    1378        8812 :     return Impl.getMinVectorRegisterBitWidth();
    1379             :   }
    1380         766 :   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
    1381         766 :     return Impl.shouldMaximizeVectorBandwidth(OptSize);
    1382             :   }
    1383           1 :   unsigned getMinimumVF(unsigned ElemWidth) const override {
    1384           1 :     return Impl.getMinimumVF(ElemWidth);
    1385             :   }
    1386       31216 :   bool shouldConsiderAddressTypePromotion(
    1387             :       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
    1388             :     return Impl.shouldConsiderAddressTypePromotion(
    1389       31216 :         I, AllowPromotionWithoutCommonHeader);
    1390             :   }
    1391          51 :   unsigned getCacheLineSize() override {
    1392          51 :     return Impl.getCacheLineSize();
    1393             :   }
    1394           8 :   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
    1395           8 :     return Impl.getCacheSize(Level);
    1396             :   }
    1397          10 :   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
    1398          10 :     return Impl.getCacheAssociativity(Level);
    1399             :   }
    1400       20577 :   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
    1401          78 :   unsigned getMinPrefetchStride() override {
    1402          78 :     return Impl.getMinPrefetchStride();
    1403             :   }
    1404         226 :   unsigned getMaxPrefetchIterationsAhead() override {
    1405         226 :     return Impl.getMaxPrefetchIterationsAhead();
    1406             :   }
    1407        1879 :   unsigned getMaxInterleaveFactor(unsigned VF) override {
    1408        1912 :     return Impl.getMaxInterleaveFactor(VF);
    1409             :   }
    1410        1574 :   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
    1411             :                                             unsigned &JTSize) override {
    1412        1574 :     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
    1413             :   }
    1414             :   unsigned
    1415      155879 :   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    1416             :                          OperandValueKind Opd2Info,
    1417             :                          OperandValueProperties Opd1PropInfo,
    1418             :                          OperandValueProperties Opd2PropInfo,
    1419             :                          ArrayRef<const Value *> Args) override {
    1420             :     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    1421      155879 :                                        Opd1PropInfo, Opd2PropInfo, Args);
    1422             :   }
    1423        3137 :   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    1424             :                      Type *SubTp) override {
    1425        3137 :     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    1426             :   }
    1427        6132 :   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    1428             :                        const Instruction *I) override {
    1429        6132 :     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
    1430             :   }
    1431          32 :   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
    1432             :                                unsigned Index) override {
    1433          32 :     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
    1434             :   }
    1435       10203 :   int getCFInstrCost(unsigned Opcode) override {
    1436       10203 :     return Impl.getCFInstrCost(Opcode);
    1437             :   }
    1438        4548 :   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    1439             :                          const Instruction *I) override {
    1440        4548 :     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
    1441             :   }
    1442       40084 :   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    1443       40084 :     return Impl.getVectorInstrCost(Opcode, Val, Index);
    1444             :   }
    1445      295397 :   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1446             :                       unsigned AddressSpace, const Instruction *I) override {
    1447      295397 :     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
    1448             :   }
    1449         126 :   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    1450             :                             unsigned AddressSpace) override {
    1451         126 :     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    1452             :   }
    1453         104 :   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    1454             :                              Value *Ptr, bool VariableMask,
    1455             :                              unsigned Alignment) override {
    1456             :     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
    1457         104 :                                        Alignment);
    1458             :   }
    1459          70 :   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
    1460             :                                  ArrayRef<unsigned> Indices, unsigned Alignment,
    1461             :                                  unsigned AddressSpace) override {
    1462             :     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
    1463          70 :                                            Alignment, AddressSpace);
    1464             :   }
    1465         285 :   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
    1466             :                                  bool IsPairwiseForm) override {
    1467         285 :     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
    1468             :   }
    1469         926 :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
    1470             :                              bool IsPairwiseForm, bool IsUnsigned) override {
    1471         926 :     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
    1472             :    }
    1473         955 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
    1474             :                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
    1475             :     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
    1476         955 :                                       ScalarizationCostPassed);
    1477             :   }
    1478        2477 :   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    1479             :        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
    1480        2477 :     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    1481             :   }
    1482         497 :   int getCallInstrCost(Function *F, Type *RetTy,
    1483             :                        ArrayRef<Type *> Tys) override {
    1484         497 :     return Impl.getCallInstrCost(F, RetTy, Tys);
    1485             :   }
    1486       12481 :   unsigned getNumberOfParts(Type *Tp) override {
    1487       12481 :     return Impl.getNumberOfParts(Tp);
    1488             :   }
    1489        1265 :   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
    1490             :                                 const SCEV *Ptr) override {
    1491        1265 :     return Impl.getAddressComputationCost(Ty, SE, Ptr);
    1492             :   }
    1493       12284 :   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    1494       12284 :     return Impl.getCostOfKeepingLiveOverCall(Tys);
    1495             :   }
    1496      199218 :   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    1497             :                           MemIntrinsicInfo &Info) override {
    1498      199218 :     return Impl.getTgtMemIntrinsic(Inst, Info);
    1499             :   }
    1500           9 :   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
    1501           9 :     return Impl.getAtomicMemIntrinsicMaxElementSize();
    1502             :   }
    1503          29 :   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    1504             :                                            Type *ExpectedType) override {
    1505          29 :     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    1506             :   }
    1507          15 :   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
    1508             :                                   unsigned SrcAlign,
    1509             :                                   unsigned DestAlign) const override {
    1510          15 :     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
    1511             :   }
    1512           0 :   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
    1513             :                                          LLVMContext &Context,
    1514             :                                          unsigned RemainingBytes,
    1515             :                                          unsigned SrcAlign,
    1516             :                                          unsigned DestAlign) const override {
    1517           0 :     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
    1518             :                                            SrcAlign, DestAlign);
    1519           0 :   }
    1520      369914 :   bool areInlineCompatible(const Function *Caller,
    1521             :                            const Function *Callee) const override {
    1522      369914 :     return Impl.areInlineCompatible(Caller, Callee);
    1523             :   }
    1524        7553 :   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
    1525       15106 :     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
    1526             :   }
    1527           0 :   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
    1528           0 :     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
    1529             :   }
    1530       29927 :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
    1531       29927 :     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
    1532             :   }
    1533       11969 :   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
    1534       11969 :     return Impl.isLegalToVectorizeLoad(LI);
    1535             :   }
    1536       16496 :   bool isLegalToVectorizeStore(StoreInst *SI) const override {
    1537       16496 :     return Impl.isLegalToVectorizeStore(SI);
    1538             :   }
    1539        1146 :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
    1540             :                                    unsigned Alignment,
    1541             :                                    unsigned AddrSpace) const override {
    1542             :     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
    1543        1146 :                                             AddrSpace);
    1544             :   }
    1545         495 :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    1546             :                                     unsigned Alignment,
    1547             :                                     unsigned AddrSpace) const override {
    1548             :     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
    1549         495 :                                              AddrSpace);
    1550             :   }
    1551        4245 :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
    1552             :                                unsigned ChainSizeInBytes,
    1553             :                                VectorType *VecTy) const override {
    1554        4245 :     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
    1555             :   }
    1556        3828 :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
    1557             :                                 unsigned ChainSizeInBytes,
    1558             :                                 VectorType *VecTy) const override {
    1559        3828 :     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
    1560             :   }
    1561         298 :   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
    1562             :                              ReductionFlags Flags) const override {
    1563         298 :     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
    1564             :   }
    1565        1741 :   bool shouldExpandReduction(const IntrinsicInst *II) const override {
    1566        1741 :     return Impl.shouldExpandReduction(II);
    1567             :   }
    1568          11 :   int getInstructionLatency(const Instruction *I) override {
    1569          11 :     return Impl.getInstructionLatency(I);
    1570             :   }
    1571             : };
    1572             : 
    1573             : template <typename T>
    1574      622191 : TargetTransformInfo::TargetTransformInfo(T Impl)
    1575     3366290 :     : TTIImpl(new Model<T>(Impl)) {}
    1576             : 
    1577             : /// Analysis pass providing the \c TargetTransformInfo.
    1578             : ///
    1579             : /// The core idea of the TargetIRAnalysis is to expose an interface through
    1580             : /// which LLVM targets can analyze and provide information about the middle
    1581             : /// end's target-independent IR. This supports use cases such as target-aware
    1582             : /// cost modeling of IR constructs.
    1583             : ///
    1584             : /// This is a function analysis because much of the cost modeling for targets
    1585             : /// is done in a subtarget specific way and LLVM supports compiling different
    1586             : /// functions targeting different subtargets in order to support runtime
    1587             : /// dispatch according to the observed subtarget.
    1588       64144 : class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
    1589             : public:
    1590             :   typedef TargetTransformInfo Result;
    1591             : 
    1592             :   /// Default construct a target IR analysis.
    1593             :   ///
    1594             :   /// This will use the module's datalayout to construct a baseline
    1595             :   /// conservative TTI result.
    1596             :   TargetIRAnalysis();
    1597             : 
    1598             :   /// Construct an IR analysis pass around a target-provide callback.
    1599             :   ///
    1600             :   /// The callback will be called with a particular function for which the TTI
    1601             :   /// is needed and must return a TTI object for that function.
    1602             :   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
    1603             : 
    1604             :   // Value semantics. We spell out the constructors for MSVC.
    1605             :   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    1606             :       : TTICallback(Arg.TTICallback) {}
    1607             :   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    1608             :       : TTICallback(std::move(Arg.TTICallback)) {}
    1609             :   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    1610             :     TTICallback = RHS.TTICallback;
    1611             :     return *this;
    1612             :   }
    1613             :   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    1614             :     TTICallback = std::move(RHS.TTICallback);
    1615             :     return *this;
    1616             :   }
    1617             : 
    1618             :   Result run(const Function &F, FunctionAnalysisManager &);
    1619             : 
    1620             : private:
    1621             :   friend AnalysisInfoMixin<TargetIRAnalysis>;
    1622             :   static AnalysisKey Key;
    1623             : 
    1624             :   /// The callback used to produce a result.
    1625             :   ///
    1626             :   /// We use a completely opaque callback so that targets can provide whatever
    1627             :   /// mechanism they desire for constructing the TTI for a given function.
    1628             :   ///
    1629             :   /// FIXME: Should we really use std::function? It's relatively inefficient.
    1630             :   /// It might be possible to arrange for even stateful callbacks to outlive
    1631             :   /// the analysis and thus use a function_ref which would be lighter weight.
    1632             :   /// This may also be less error prone as the callback is likely to reference
    1633             :   /// the external TargetMachine, and that reference needs to never dangle.
    1634             :   std::function<Result(const Function &)> TTICallback;
    1635             : 
    1636             :   /// Helper function used as the callback in the default constructor.
    1637             :   static Result getDefaultTTI(const Function &F);
    1638             : };
    1639             : 
    1640             : /// Wrapper pass for TargetTransformInfo.
    1641             : ///
    1642             : /// This pass can be constructed from a TTI object which it stores internally
    1643             : /// and is queried by passes.
    1644      192432 : class TargetTransformInfoWrapperPass : public ImmutablePass {
    1645             :   TargetIRAnalysis TIRA;
    1646             :   Optional<TargetTransformInfo> TTI;
    1647             : 
    1648             :   virtual void anchor();
    1649             : 
    1650             : public:
    1651             :   static char ID;
    1652             : 
    1653             :   /// We must provide a default constructor for the pass but it should
    1654             :   /// never be used.
    1655             :   ///
    1656             :   /// Use the constructor below or call one of the creation routines.
    1657             :   TargetTransformInfoWrapperPass();
    1658             : 
    1659             :   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1660             : 
    1661             :   TargetTransformInfo &getTTI(const Function &F);
    1662             : };
    1663             : 
    1664             : /// Create an analysis pass wrapper around a TTI object.
    1665             : ///
    1666             : /// This analysis pass just holds the TTI instance and makes it available to
    1667             : /// clients.
    1668             : ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    1669             : 
    1670             : } // End llvm namespace
    1671             : 
    1672             : #endif

Generated by: LCOV version 1.13