LLVM  9.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include <functional>
31 
32 namespace llvm {
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 class Function;
39 class GlobalValue;
40 class IntrinsicInst;
41 class LoadInst;
42 class Loop;
43 class SCEV;
44 class ScalarEvolution;
45 class StoreInst;
46 class SwitchInst;
47 class Type;
48 class User;
49 class Value;
50 
51 /// Information about a load/store intrinsic defined by the target.
53  /// This is the pointer that the intrinsic is loading from or storing to.
54  /// If this is non-null, then analysis/optimization passes can assume that
55  /// this intrinsic is functionally equivalent to a load/store from this
56  /// pointer.
57  Value *PtrVal = nullptr;
58 
59  // Ordering for atomic operations.
61 
62  // Same Id is set by the target for corresponding load/store intrinsics.
63  unsigned short MatchingId = 0;
64 
65  bool ReadMem = false;
66  bool WriteMem = false;
67  bool IsVolatile = false;
68 
69  bool isUnordered() const {
70  return (Ordering == AtomicOrdering::NotAtomic ||
71  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
72  }
73 };
74 
75 /// This pass provides access to the codegen interfaces that are needed
76 /// for IR-level transformations.
78 public:
79  /// Construct a TTI object using a type implementing the \c Concept
80  /// API below.
81  ///
82  /// This is used by targets to construct a TTI wrapping their target-specific
83  /// implementaion that encodes appropriate costs for their target.
84  template <typename T> TargetTransformInfo(T Impl);
85 
86  /// Construct a baseline TTI object using a minimal implementation of
87  /// the \c Concept API below.
88  ///
89  /// The TTI implementation will reflect the information in the DataLayout
90  /// provided if non-null.
91  explicit TargetTransformInfo(const DataLayout &DL);
92 
93  // Provide move semantics.
95  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
96 
97  // We need to define the destructor out-of-line to define our sub-classes
98  // out-of-line.
100 
101  /// Handle the invalidation of this information.
102  ///
103  /// When used as a result of \c TargetIRAnalysis this method will be called
104  /// when the function this was computed for changes. When it returns false,
105  /// the information is preserved across those changes.
108  // FIXME: We should probably in some way ensure that the subtarget
109  // information for a function hasn't changed.
110  return false;
111  }
112 
113  /// \name Generic Target Information
114  /// @{
115 
116  /// The kind of cost model.
117  ///
118  /// There are several different cost models that can be customized by the
119  /// target. The normalization of each cost model may be target specific.
121  TCK_RecipThroughput, ///< Reciprocal throughput.
122  TCK_Latency, ///< The latency of instruction.
123  TCK_CodeSize ///< Instruction code size.
124  };
125 
126  /// Query the cost of a specified instruction.
127  ///
128  /// Clients should use this interface to query the cost of an existing
129  /// instruction. The instruction must have a valid parent (basic block).
130  ///
131  /// Note, this method does not cache the cost calculation and it
132  /// can be expensive in some cases.
133  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
134  switch (kind){
135  case TCK_RecipThroughput:
136  return getInstructionThroughput(I);
137 
138  case TCK_Latency:
139  return getInstructionLatency(I);
140 
141  case TCK_CodeSize:
142  return getUserCost(I);
143  }
144  llvm_unreachable("Unknown instruction cost kind");
145  }
146 
147  /// Underlying constants for 'cost' values in this interface.
148  ///
149  /// Many APIs in this interface return a cost. This enum defines the
150  /// fundamental values that should be used to interpret (and produce) those
151  /// costs. The costs are returned as an int rather than a member of this
152  /// enumeration because it is expected that the cost of one IR instruction
153  /// may have a multiplicative factor to it or otherwise won't fit directly
154  /// into the enum. Moreover, it is common to sum or average costs which works
155  /// better as simple integral values. Thus this enum only provides constants.
156  /// Also note that the returned costs are signed integers to make it natural
157  /// to add, subtract, and test with zero (a common boundary condition). It is
158  /// not expected that 2^32 is a realistic cost to be modeling at any point.
159  ///
160  /// Note that these costs should usually reflect the intersection of code-size
161  /// cost and execution cost. A free instruction is typically one that folds
162  /// into another instruction. For example, reg-to-reg moves can often be
163  /// skipped by renaming the registers in the CPU, but they still are encoded
164  /// and thus wouldn't be considered 'free' here.
166  TCC_Free = 0, ///< Expected to fold away in lowering.
167  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
168  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
169  };
170 
171  /// Estimate the cost of a specific operation when lowered.
172  ///
173  /// Note that this is designed to work on an arbitrary synthetic opcode, and
174  /// thus work for hypothetical queries before an instruction has even been
175  /// formed. However, this does *not* work for GEPs, and must not be called
176  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
177  /// analyzing a GEP's cost required more information.
178  ///
179  /// Typically only the result type is required, and the operand type can be
180  /// omitted. However, if the opcode is one of the cast instructions, the
181  /// operand type is required.
182  ///
183  /// The returned cost is defined in terms of \c TargetCostConstants, see its
184  /// comments for a detailed explanation of the cost values.
185  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
186 
187  /// Estimate the cost of a GEP operation when lowered.
188  ///
189  /// The contract for this function is the same as \c getOperationCost except
190  /// that it supports an interface that provides extra information specific to
191  /// the GEP operation.
192  int getGEPCost(Type *PointeeType, const Value *Ptr,
193  ArrayRef<const Value *> Operands) const;
194 
195  /// Estimate the cost of a EXT operation when lowered.
196  ///
197  /// The contract for this function is the same as \c getOperationCost except
198  /// that it supports an interface that provides extra information specific to
199  /// the EXT operation.
200  int getExtCost(const Instruction *I, const Value *Src) const;
201 
202  /// Estimate the cost of a function call when lowered.
203  ///
204  /// The contract for this is the same as \c getOperationCost except that it
205  /// supports an interface that provides extra information specific to call
206  /// instructions.
207  ///
208  /// This is the most basic query for estimating call cost: it only knows the
209  /// function type and (potentially) the number of arguments at the call site.
210  /// The latter is only interesting for varargs function types.
211  int getCallCost(FunctionType *FTy, int NumArgs = -1,
212  const User *U = nullptr) const;
213 
214  /// Estimate the cost of calling a specific function when lowered.
215  ///
216  /// This overload adds the ability to reason about the particular function
217  /// being called in the event it is a library call with special lowering.
218  int getCallCost(const Function *F, int NumArgs = -1,
219  const User *U = nullptr) const;
220 
221  /// Estimate the cost of calling a specific function when lowered.
222  ///
223  /// This overload allows specifying a set of candidate argument values.
224  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
225  const User *U = nullptr) const;
226 
227  /// \returns A value by which our inlining threshold should be multiplied.
228  /// This is primarily used to bump up the inlining threshold wholesale on
229  /// targets where calls are unusually expensive.
230  ///
231  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
232  /// individual classes of instructions would be better.
233  unsigned getInliningThresholdMultiplier() const;
234 
235  /// Estimate the cost of an intrinsic when lowered.
236  ///
237  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
238  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
239  ArrayRef<Type *> ParamTys,
240  const User *U = nullptr) const;
241 
242  /// Estimate the cost of an intrinsic when lowered.
243  ///
244  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
245  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
247  const User *U = nullptr) const;
248 
249  /// \Return the expected cost of a memcpy, which could e.g. depend on the
250  /// source/destination type and alignment and the number of bytes copied.
251  int getMemcpyCost(const Instruction *I) const;
252 
253  /// \return The estimated number of case clusters when lowering \p 'SI'.
254  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
255  /// table.
256  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
257  unsigned &JTSize) const;
258 
259  /// Estimate the cost of a given IR user when lowered.
260  ///
261  /// This can estimate the cost of either a ConstantExpr or Instruction when
262  /// lowered. It has two primary advantages over the \c getOperationCost and
263  /// \c getGEPCost above, and one significant disadvantage: it can only be
264  /// used when the IR construct has already been formed.
265  ///
266  /// The advantages are that it can inspect the SSA use graph to reason more
267  /// accurately about the cost. For example, all-constant-GEPs can often be
268  /// folded into a load or other instruction, but if they are used in some
269  /// other context they may not be folded. This routine can distinguish such
270  /// cases.
271  ///
272  /// \p Operands is a list of operands which can be a result of transformations
273  /// of the current operands. The number of the operands on the list must equal
274  /// to the number of the current operands the IR user has. Their order on the
275  /// list must be the same as the order of the current operands the IR user
276  /// has.
277  ///
278  /// The returned cost is defined in terms of \c TargetCostConstants, see its
279  /// comments for a detailed explanation of the cost values.
280  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
281 
282  /// This is a helper function which calls the two-argument getUserCost
283  /// with \p Operands which are the current operands U has.
284  int getUserCost(const User *U) const {
286  U->value_op_end());
287  return getUserCost(U, Operands);
288  }
289 
290  /// Return true if branch divergence exists.
291  ///
292  /// Branch divergence has a significantly negative impact on GPU performance
293  /// when threads in the same wavefront take different paths due to conditional
294  /// branches.
295  bool hasBranchDivergence() const;
296 
297  /// Returns whether V is a source of divergence.
298  ///
299  /// This function provides the target-dependent information for
300  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
301  /// builds the dependency graph, and then runs the reachability algorithm
302  /// starting with the sources of divergence.
303  bool isSourceOfDivergence(const Value *V) const;
304 
305  // Returns true for the target specific
306  // set of operations which produce uniform result
307  // even taking non-unform arguments
308  bool isAlwaysUniform(const Value *V) const;
309 
310  /// Returns the address space ID for a target's 'flat' address space. Note
311  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
312  /// refers to as the generic address space. The flat address space is a
313  /// generic address space that can be used access multiple segments of memory
314  /// with different address spaces. Access of a memory location through a
315  /// pointer with this address space is expected to be legal but slower
316  /// compared to the same memory location accessed through a pointer with a
317  /// different address space.
318  //
319  /// This is for targets with different pointer representations which can
320  /// be converted with the addrspacecast instruction. If a pointer is converted
321  /// to this address space, optimizations should attempt to replace the access
322  /// with the source address space.
323  ///
324  /// \returns ~0u if the target does not have such a flat address space to
325  /// optimize away.
326  unsigned getFlatAddressSpace() const;
327 
328  /// Test whether calls to a function lower to actual program function
329  /// calls.
330  ///
331  /// The idea is to test whether the program is likely to require a 'call'
332  /// instruction or equivalent in order to call the given function.
333  ///
334  /// FIXME: It's not clear that this is a good or useful query API. Client's
335  /// should probably move to simpler cost metrics using the above.
336  /// Alternatively, we could split the cost interface into distinct code-size
337  /// and execution-speed costs. This would allow modelling the core of this
338  /// query more accurately as a call is a single small instruction, but
339  /// incurs significant execution cost.
340  bool isLoweredToCall(const Function *F) const;
341 
342  struct LSRCost {
343  /// TODO: Some of these could be merged. Also, a lexical ordering
344  /// isn't always optimal.
345  unsigned Insns;
346  unsigned NumRegs;
347  unsigned AddRecCost;
348  unsigned NumIVMuls;
349  unsigned NumBaseAdds;
350  unsigned ImmCost;
351  unsigned SetupCost;
352  unsigned ScaleCost;
353  };
354 
355  /// Parameters that control the generic loop unrolling transformation.
357  /// The cost threshold for the unrolled loop. Should be relative to the
358  /// getUserCost values returned by this API, and the expectation is that
359  /// the unrolled loop's instructions when run through that interface should
360  /// not exceed this cost. However, this is only an estimate. Also, specific
361  /// loops may be unrolled even with a cost above this threshold if deemed
362  /// profitable. Set this to UINT_MAX to disable the loop body cost
363  /// restriction.
364  unsigned Threshold;
365  /// If complete unrolling will reduce the cost of the loop, we will boost
366  /// the Threshold by a certain percent to allow more aggressive complete
367  /// unrolling. This value provides the maximum boost percentage that we
368  /// can apply to Threshold (The value should be no less than 100).
369  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
370  /// MaxPercentThresholdBoost / 100)
371  /// E.g. if complete unrolling reduces the loop execution time by 50%
372  /// then we boost the threshold by the factor of 2x. If unrolling is not
373  /// expected to reduce the running time, then we do not increase the
374  /// threshold.
376  /// The cost threshold for the unrolled loop when optimizing for size (set
377  /// to UINT_MAX to disable).
379  /// The cost threshold for the unrolled loop, like Threshold, but used
380  /// for partial/runtime unrolling (set to UINT_MAX to disable).
382  /// The cost threshold for the unrolled loop when optimizing for size, like
383  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
384  /// UINT_MAX to disable).
386  /// A forced unrolling factor (the number of concatenated bodies of the
387  /// original loop in the unrolled loop body). When set to 0, the unrolling
388  /// transformation will select an unrolling factor based on the current cost
389  /// threshold and other factors.
390  unsigned Count;
391  /// A forced peeling factor (the number of bodied of the original loop
392  /// that should be peeled off before the loop body). When set to 0, the
393  /// unrolling transformation will select a peeling factor based on profile
394  /// information and other factors.
395  unsigned PeelCount;
396  /// Default unroll count for loops with run-time trip count.
398  // Set the maximum unrolling factor. The unrolling factor may be selected
399  // using the appropriate cost threshold, but may not exceed this number
400  // (set to UINT_MAX to disable). This does not apply in cases where the
401  // loop is being fully unrolled.
402  unsigned MaxCount;
403  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
404  /// applies even if full unrolling is selected. This allows a target to fall
405  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
407  // Represents number of instructions optimized when "back edge"
408  // becomes "fall through" in unrolled loop.
409  // For now we count a conditional branch on a backedge and a comparison
410  // feeding it.
411  unsigned BEInsns;
412  /// Allow partial unrolling (unrolling of loops to expand the size of the
413  /// loop body, not only to eliminate small constant-trip-count loops).
414  bool Partial;
415  /// Allow runtime unrolling (unrolling of loops to expand the size of the
416  /// loop body even when the number of loop iterations is not known at
417  /// compile time).
418  bool Runtime;
419  /// Allow generation of a loop remainder (extra iterations after unroll).
421  /// Allow emitting expensive instructions (such as divisions) when computing
422  /// the trip count of a loop for runtime unrolling.
424  /// Apply loop unroll on any kind of loop
425  /// (mainly to loops that fail runtime unrolling).
426  bool Force;
427  /// Allow using trip count upper bound to unroll loops.
429  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
431  /// Allow unrolling of all the iterations of the runtime loop remainder.
433  /// Allow unroll and jam. Used to enable unroll and jam for the target.
435  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
436  /// value above is used during unroll and jam for the outer loop size.
437  /// This value is used in the same manner to limit the size of the inner
438  /// loop.
440  };
441 
442  /// Get target-customized preferences for the generic loop unrolling
443  /// transformation. The caller will initialize UP with the current
444  /// target-independent defaults.
445  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
446  UnrollingPreferences &UP) const;
447 
448  /// @}
449 
450  /// \name Scalar Target Information
451  /// @{
452 
453  /// Flags indicating the kind of support for population count.
454  ///
455  /// Compared to the SW implementation, HW support is supposed to
456  /// significantly boost the performance when the population is dense, and it
457  /// may or may not degrade performance if the population is sparse. A HW
458  /// support is considered as "Fast" if it can outperform, or is on a par
459  /// with, SW implementation when the population is sparse; otherwise, it is
460  /// considered as "Slow".
461  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
462 
463  /// Return true if the specified immediate is legal add immediate, that
464  /// is the target has add instructions which can add a register with the
465  /// immediate without having to materialize the immediate into a register.
466  bool isLegalAddImmediate(int64_t Imm) const;
467 
468  /// Return true if the specified immediate is legal icmp immediate,
469  /// that is the target has icmp instructions which can compare a register
470  /// against the immediate without having to materialize the immediate into a
471  /// register.
472  bool isLegalICmpImmediate(int64_t Imm) const;
473 
474  /// Return true if the addressing mode represented by AM is legal for
475  /// this target, for a load/store of the specified type.
476  /// The type may be VoidTy, in which case only return true if the addressing
477  /// mode is legal for a load/store of any legal type.
478  /// If target returns true in LSRWithInstrQueries(), I may be valid.
479  /// TODO: Handle pre/postinc as well.
480  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
481  bool HasBaseReg, int64_t Scale,
482  unsigned AddrSpace = 0,
483  Instruction *I = nullptr) const;
484 
485  /// Return true if LSR cost of C1 is lower than C1.
486  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
487  TargetTransformInfo::LSRCost &C2) const;
488 
489  /// Return true if the target can fuse a compare and branch.
490  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
491  /// calculation for the instructions in a loop.
492  bool canMacroFuseCmp() const;
493 
494  /// \return True is LSR should make efforts to create/preserve post-inc
495  /// addressing mode expressions.
496  bool shouldFavorPostInc() const;
497 
498  /// Return true if LSR should make efforts to generate indexed addressing
499  /// modes that operate across loop iterations.
500  bool shouldFavorBackedgeIndex(const Loop *L) const;
501 
502  /// Return true if the target supports masked load.
503  bool isLegalMaskedStore(Type *DataType) const;
504  /// Return true if the target supports masked store.
505  bool isLegalMaskedLoad(Type *DataType) const;
506 
507  /// Return true if the target supports masked scatter.
508  bool isLegalMaskedScatter(Type *DataType) const;
509  /// Return true if the target supports masked gather.
510  bool isLegalMaskedGather(Type *DataType) const;
511 
512  /// Return true if the target supports masked compress store.
513  bool isLegalMaskedCompressStore(Type *DataType) const;
514  /// Return true if the target supports masked expand load.
515  bool isLegalMaskedExpandLoad(Type *DataType) const;
516 
517  /// Return true if the target has a unified operation to calculate division
518  /// and remainder. If so, the additional implicit multiplication and
519  /// subtraction required to calculate a remainder from division are free. This
520  /// can enable more aggressive transformations for division and remainder than
521  /// would typically be allowed using throughput or size cost models.
522  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
523 
524  /// Return true if the given instruction (assumed to be a memory access
525  /// instruction) has a volatile variant. If that's the case then we can avoid
526  /// addrspacecast to generic AS for volatile loads/stores. Default
527  /// implementation returns false, which prevents address space inference for
528  /// volatile loads/stores.
529  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
530 
531  /// Return true if target doesn't mind addresses in vectors.
532  bool prefersVectorizedAddressing() const;
533 
534  /// Return the cost of the scaling factor used in the addressing
535  /// mode represented by AM for this target, for a load/store
536  /// of the specified type.
537  /// If the AM is supported, the return value must be >= 0.
538  /// If the AM is not supported, it returns a negative value.
539  /// TODO: Handle pre/postinc as well.
540  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
541  bool HasBaseReg, int64_t Scale,
542  unsigned AddrSpace = 0) const;
543 
544  /// Return true if the loop strength reduce pass should make
545  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
546  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
547  /// immediate offset and no index register.
548  bool LSRWithInstrQueries() const;
549 
550  /// Return true if it's free to truncate a value of type Ty1 to type
551  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
552  /// by referencing its sub-register AX.
553  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
554 
555  /// Return true if it is profitable to hoist instruction in the
556  /// then/else to before if.
557  bool isProfitableToHoist(Instruction *I) const;
558 
559  bool useAA() const;
560 
561  /// Return true if this type is legal.
562  bool isTypeLegal(Type *Ty) const;
563 
564  /// Returns the target's jmp_buf alignment in bytes.
565  unsigned getJumpBufAlignment() const;
566 
567  /// Returns the target's jmp_buf size in bytes.
568  unsigned getJumpBufSize() const;
569 
570  /// Return true if switches should be turned into lookup tables for the
571  /// target.
572  bool shouldBuildLookupTables() const;
573 
574  /// Return true if switches should be turned into lookup tables
575  /// containing this constant value for the target.
576  bool shouldBuildLookupTablesForConstant(Constant *C) const;
577 
578  /// Return true if the input function which is cold at all call sites,
579  /// should use coldcc calling convention.
580  bool useColdCCForColdCall(Function &F) const;
581 
582  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
583 
584  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
585  unsigned VF) const;
586 
587  /// If target has efficient vector element load/store instructions, it can
588  /// return true here so that insertion/extraction costs are not added to
589  /// the scalarization cost of a load/store.
590  bool supportsEfficientVectorElementLoadStore() const;
591 
592  /// Don't restrict interleaved unrolling to small loops.
593  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
594 
595  /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
596  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
598  // The list of available load sizes (in bytes), sorted in decreasing order.
600  // Set to true to allow overlapping loads. For example, 7-byte compares can
601  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
602  // requires all loads in LoadSizes to be doable in an unaligned way.
603  bool AllowOverlappingLoads = false;
604  };
605  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
606 
607  /// Enable matching of interleaved access groups.
608  bool enableInterleavedAccessVectorization() const;
609 
610  /// Enable matching of interleaved access groups that contain predicated
611  /// accesses or gaps and therefore vectorized using masked
612  /// vector loads/stores.
613  bool enableMaskedInterleavedAccessVectorization() const;
614 
615  /// Indicate that it is potentially unsafe to automatically vectorize
616  /// floating-point operations because the semantics of vector and scalar
617  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
618  /// does not support IEEE-754 denormal numbers, while depending on the
619  /// platform, scalar floating-point math does.
620  /// This applies to floating-point math operations and calls, not memory
621  /// operations, shuffles, or casts.
622  bool isFPVectorizationPotentiallyUnsafe() const;
623 
624  /// Determine if the target supports unaligned memory accesses.
625  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
626  unsigned BitWidth, unsigned AddressSpace = 0,
627  unsigned Alignment = 1,
628  bool *Fast = nullptr) const;
629 
630  /// Return hardware support for population count.
631  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
632 
633  /// Return true if the hardware has a fast square-root instruction.
634  bool haveFastSqrt(Type *Ty) const;
635 
636  /// Return true if it is faster to check if a floating-point value is NaN
637  /// (or not-NaN) versus a comparison against a constant FP zero value.
638  /// Targets should override this if materializing a 0.0 for comparison is
639  /// generally as cheap as checking for ordered/unordered.
640  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
641 
642  /// Return the expected cost of supporting the floating point operation
643  /// of the specified type.
644  int getFPOpCost(Type *Ty) const;
645 
646  /// Return the expected cost of materializing for the given integer
647  /// immediate of the specified type.
648  int getIntImmCost(const APInt &Imm, Type *Ty) const;
649 
650  /// Return the expected cost of materialization for the given integer
651  /// immediate of the specified type for a given instruction. The cost can be
652  /// zero if the immediate can be folded into the specified instruction.
653  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
654  Type *Ty) const;
655  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
656  Type *Ty) const;
657 
658  /// Return the expected cost for the given integer when optimising
659  /// for size. This is different than the other integer immediate cost
660  /// functions in that it is subtarget agnostic. This is useful when you e.g.
661  /// target one ISA such as Aarch32 but smaller encodings could be possible
662  /// with another such as Thumb. This return value is used as a penalty when
663  /// the total costs for a constant is calculated (the bigger the cost, the
664  /// more beneficial constant hoisting is).
665  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
666  Type *Ty) const;
667  /// @}
668 
669  /// \name Vector Target Information
670  /// @{
671 
672  /// The various kinds of shuffle patterns for vector queries.
673  enum ShuffleKind {
674  SK_Broadcast, ///< Broadcast element 0 to all other elements.
675  SK_Reverse, ///< Reverse the order of the vector.
676  SK_Select, ///< Selects elements from the corresponding lane of
677  ///< either source operand. This is equivalent to a
678  ///< vector select with a constant condition operand.
679  SK_Transpose, ///< Transpose two vectors.
680  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
681  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
682  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
683  ///< with any shuffle mask.
684  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
685  ///< shuffle mask.
686  };
687 
688  /// Additional information about an operand's possible values.
690  OK_AnyValue, // Operand can have any value.
691  OK_UniformValue, // Operand is uniform (splat of a value).
692  OK_UniformConstantValue, // Operand is uniform constant.
693  OK_NonUniformConstantValue // Operand is a non uniform constant value.
694  };
695 
696  /// Additional properties of an operand's values.
697  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
698 
699  /// \return The number of scalar or vector registers that the target has.
700  /// If 'Vectors' is true, it returns the number of vector registers. If it is
701  /// set to false, it returns the number of scalar registers.
702  unsigned getNumberOfRegisters(bool Vector) const;
703 
704  /// \return The width of the largest scalar or vector register type.
705  unsigned getRegisterBitWidth(bool Vector) const;
706 
707  /// \return The width of the smallest vector register type.
708  unsigned getMinVectorRegisterBitWidth() const;
709 
710  /// \return True if the vectorization factor should be chosen to
711  /// make the vector of the smallest element type match the size of a
712  /// vector register. For wider element types, this could result in
713  /// creating vectors that span multiple vector registers.
714  /// If false, the vectorization factor will be chosen based on the
715  /// size of the widest element type.
716  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
717 
718  /// \return The minimum vectorization factor for types of given element
719  /// bit width, or 0 if there is no mimimum VF. The returned value only
720  /// applies when shouldMaximizeVectorBandwidth returns true.
721  unsigned getMinimumVF(unsigned ElemWidth) const;
722 
723  /// \return True if it should be considered for address type promotion.
724  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
725  /// profitable without finding other extensions fed by the same input.
726  bool shouldConsiderAddressTypePromotion(
727  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
728 
729  /// \return The size of a cache line in bytes.
730  unsigned getCacheLineSize() const;
731 
732  /// The possible cache levels
733  enum class CacheLevel {
734  L1D, // The L1 data cache
735  L2D, // The L2 data cache
736 
737  // We currently do not model L3 caches, as their sizes differ widely between
738  // microarchitectures. Also, we currently do not have a use for L3 cache
739  // size modeling yet.
740  };
741 
742  /// \return The size of the cache level in bytes, if available.
743  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
744 
745  /// \return The associativity of the cache level, if available.
746  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
747 
748  /// \return How much before a load we should place the prefetch instruction.
749  /// This is currently measured in number of instructions.
750  unsigned getPrefetchDistance() const;
751 
752  /// \return Some HW prefetchers can handle accesses up to a certain constant
753  /// stride. This is the minimum stride in bytes where it makes sense to start
754  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
755  unsigned getMinPrefetchStride() const;
756 
757  /// \return The maximum number of iterations to prefetch ahead. If the
758  /// required number of iterations is more than this number, no prefetching is
759  /// performed.
760  unsigned getMaxPrefetchIterationsAhead() const;
761 
762  /// \return The maximum interleave factor that any transform should try to
763  /// perform for this target. This number depends on the level of parallelism
764  /// and the number of execution units in the CPU.
765  unsigned getMaxInterleaveFactor(unsigned VF) const;
766 
767  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
768  static OperandValueKind getOperandInfo(Value *V,
769  OperandValueProperties &OpProps);
770 
771  /// This is an approximation of reciprocal throughput of a math/logic op.
772  /// A higher cost indicates less expected throughput.
773  /// From Agner Fog's guides, reciprocal throughput is "the average number of
774  /// clock cycles per instruction when the instructions are not part of a
775  /// limiting dependency chain."
776  /// Therefore, costs should be scaled to account for multiple execution units
777  /// on the target that can process this type of instruction. For example, if
778  /// there are 5 scalar integer units and 2 vector integer units that can
779  /// calculate an 'add' in a single cycle, this model should indicate that the
780  /// cost of the vector add instruction is 2.5 times the cost of the scalar
781  /// add instruction.
782  /// \p Args is an optional argument which holds the instruction operands
783  /// values so the TTI can analyze those values searching for special
784  /// cases or optimizations based on those values.
785  int getArithmeticInstrCost(
786  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
787  OperandValueKind Opd2Info = OK_AnyValue,
788  OperandValueProperties Opd1PropInfo = OP_None,
789  OperandValueProperties Opd2PropInfo = OP_None,
791 
792  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
793  /// The index and subtype parameters are used by the subvector insertion and
794  /// extraction shuffle kinds to show the insert/extract point and the type of
795  /// the subvector being inserted/extracted.
796  /// NOTE: For subvector extractions Tp represents the source type.
797  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
798  Type *SubTp = nullptr) const;
799 
800  /// \return The expected cost of cast instructions, such as bitcast, trunc,
801  /// zext, etc. If there is an existing instruction that holds Opcode, it
802  /// may be passed in the 'I' parameter.
803  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
804  const Instruction *I = nullptr) const;
805 
806  /// \return The expected cost of a sign- or zero-extended vector extract. Use
807  /// -1 to indicate that there is no information about the index value.
808  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
809  unsigned Index = -1) const;
810 
811  /// \return The expected cost of control-flow related instructions such as
812  /// Phi, Ret, Br.
813  int getCFInstrCost(unsigned Opcode) const;
814 
815  /// \returns The expected cost of compare and select instructions. If there
816  /// is an existing instruction that holds Opcode, it may be passed in the
817  /// 'I' parameter.
818  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
819  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
820 
821  /// \return The expected cost of vector Insert and Extract.
822  /// Use -1 to indicate that there is no information on the index value.
823  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
824 
825  /// \return The cost of Load and Store instructions.
826  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
827  unsigned AddressSpace, const Instruction *I = nullptr) const;
828 
829  /// \return The cost of masked Load and Store instructions.
830  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
831  unsigned AddressSpace) const;
832 
833  /// \return The cost of Gather or Scatter operation
834  /// \p Opcode - is a type of memory access Load or Store
835  /// \p DataTy - a vector type of the data to be loaded or stored
836  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
837  /// \p VariableMask - true when the memory access is predicated with a mask
838  /// that is not a compile-time constant
839  /// \p Alignment - alignment of single element
840  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
841  bool VariableMask, unsigned Alignment) const;
842 
843  /// \return The cost of the interleaved memory operation.
844  /// \p Opcode is the memory operation code
845  /// \p VecTy is the vector type of the interleaved access.
846  /// \p Factor is the interleave factor
847  /// \p Indices is the indices for interleaved load members (as interleaved
848  /// load allows gaps)
849  /// \p Alignment is the alignment of the memory operation
850  /// \p AddressSpace is address space of the pointer.
851  /// \p UseMaskForCond indicates if the memory access is predicated.
852  /// \p UseMaskForGaps indicates if gaps should be masked.
853  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
854  ArrayRef<unsigned> Indices, unsigned Alignment,
855  unsigned AddressSpace,
856  bool UseMaskForCond = false,
857  bool UseMaskForGaps = false) const;
858 
859  /// Calculate the cost of performing a vector reduction.
860  ///
861  /// This is the cost of reducing the vector value of type \p Ty to a scalar
862  /// value using the operation denoted by \p Opcode. The form of the reduction
863  /// can either be a pairwise reduction or a reduction that splits the vector
864  /// at every reduction level.
865  ///
866  /// Pairwise:
867  /// (v0, v1, v2, v3)
868  /// ((v0+v1), (v2+v3), undef, undef)
869  /// Split:
870  /// (v0, v1, v2, v3)
871  /// ((v0+v2), (v1+v3), undef, undef)
872  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
873  bool IsPairwiseForm) const;
874  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
875  bool IsUnsigned) const;
876 
877  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
878  /// Three cases are handled: 1. scalar instruction 2. vector instruction
879  /// 3. scalar instruction which is to be vectorized with VF.
880  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
882  unsigned VF = 1) const;
883 
884  /// \returns The cost of Intrinsic instructions. Types analysis only.
885  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
886  /// arguments and the return value will be computed based on types.
887  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
889  unsigned ScalarizationCostPassed = UINT_MAX) const;
890 
891  /// \returns The cost of Call instructions.
892  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
893 
894  /// \returns The number of pieces into which the provided type must be
895  /// split during legalization. Zero is returned when the answer is unknown.
896  unsigned getNumberOfParts(Type *Tp) const;
897 
898  /// \returns The cost of the address computation. For most targets this can be
899  /// merged into the instruction indexing mode. Some targets might want to
900  /// distinguish between address computation for memory operations on vector
901  /// types and scalar types. Such targets should override this function.
902  /// The 'SE' parameter holds pointer for the scalar evolution object which
903  /// is used in order to get the Ptr step value in case of constant stride.
904  /// The 'Ptr' parameter holds SCEV of the access pointer.
905  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
906  const SCEV *Ptr = nullptr) const;
907 
908  /// \returns The cost, if any, of keeping values of the given types alive
909  /// over a callsite.
910  ///
911  /// Some types may require the use of register classes that do not have
912  /// any callee-saved registers, so would require a spill and fill.
913  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
914 
915  /// \returns True if the intrinsic is a supported memory intrinsic. Info
916  /// will contain additional information - whether the intrinsic may write
917  /// or read to memory, volatility and the pointer. Info is undefined
918  /// if false is returned.
919  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
920 
921  /// \returns The maximum element size, in bytes, for an element
922  /// unordered-atomic memory intrinsic.
923  unsigned getAtomicMemIntrinsicMaxElementSize() const;
924 
925  /// \returns A value which is the result of the given memory intrinsic. New
926  /// instructions may be created to extract the result from the given intrinsic
927  /// memory operation. Returns nullptr if the target cannot create a result
928  /// from the given intrinsic.
929  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
930  Type *ExpectedType) const;
931 
932  /// \returns The type to use in a loop expansion of a memcpy call.
933  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
934  unsigned SrcAlign, unsigned DestAlign) const;
935 
936  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
937  /// \param RemainingBytes The number of bytes to copy.
938  ///
939  /// Calculates the operand types to use when copying \p RemainingBytes of
940  /// memory, where source and destination alignments are \p SrcAlign and
941  /// \p DestAlign respectively.
942  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
943  LLVMContext &Context,
944  unsigned RemainingBytes,
945  unsigned SrcAlign,
946  unsigned DestAlign) const;
947 
948  /// \returns True if the two functions have compatible attributes for inlining
949  /// purposes.
950  bool areInlineCompatible(const Function *Caller,
951  const Function *Callee) const;
952 
953  /// \returns True if the caller and callee agree on how \p Args will be passed
954  /// to the callee.
955  /// \param[out] Args The list of compatible arguments. The implementation may
956  /// filter out any incompatible args from this list.
957  bool areFunctionArgsABICompatible(const Function *Caller,
958  const Function *Callee,
959  SmallPtrSetImpl<Argument *> &Args) const;
960 
961  /// The type of load/store indexing.
963  MIM_Unindexed, ///< No indexing.
964  MIM_PreInc, ///< Pre-incrementing.
965  MIM_PreDec, ///< Pre-decrementing.
966  MIM_PostInc, ///< Post-incrementing.
967  MIM_PostDec ///< Post-decrementing.
968  };
969 
970  /// \returns True if the specified indexed load for the given type is legal.
971  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
972 
973  /// \returns True if the specified indexed store for the given type is legal.
974  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
975 
976  /// \returns The bitwidth of the largest vector type that should be used to
977  /// load/store in the given address space.
978  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
979 
980  /// \returns True if the load instruction is legal to vectorize.
981  bool isLegalToVectorizeLoad(LoadInst *LI) const;
982 
983  /// \returns True if the store instruction is legal to vectorize.
984  bool isLegalToVectorizeStore(StoreInst *SI) const;
985 
986  /// \returns True if it is legal to vectorize the given load chain.
987  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
988  unsigned Alignment,
989  unsigned AddrSpace) const;
990 
991  /// \returns True if it is legal to vectorize the given store chain.
992  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
993  unsigned Alignment,
994  unsigned AddrSpace) const;
995 
996  /// \returns The new vector factor value if the target doesn't support \p
997  /// SizeInBytes loads or has a better vector factor.
998  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
999  unsigned ChainSizeInBytes,
1000  VectorType *VecTy) const;
1001 
1002  /// \returns The new vector factor value if the target doesn't support \p
1003  /// SizeInBytes stores or has a better vector factor.
1004  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1005  unsigned ChainSizeInBytes,
1006  VectorType *VecTy) const;
1007 
1008  /// Flags describing the kind of vector reduction.
1010  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1011  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1012  bool IsSigned; ///< Whether the operation is a signed int reduction.
1013  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1014  };
1015 
1016  /// \returns True if the target wants to handle the given reduction idiom in
1017  /// the intrinsics form instead of the shuffle form.
1018  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1019  ReductionFlags Flags) const;
1020 
1021  /// \returns True if the target wants to expand the given reduction intrinsic
1022  /// into a shuffle sequence.
1023  bool shouldExpandReduction(const IntrinsicInst *II) const;
1024  /// @}
1025 
1026 private:
1027  /// Estimate the latency of specified instruction.
1028  /// Returns 1 as the default value.
1029  int getInstructionLatency(const Instruction *I) const;
1030 
1031  /// Returns the expected throughput cost of the instruction.
1032  /// Returns -1 if the cost is unknown.
1033  int getInstructionThroughput(const Instruction *I) const;
1034 
1035  /// The abstract base class used to type erase specific TTI
1036  /// implementations.
1037  class Concept;
1038 
1039  /// The template model for the base class which wraps a concrete
1040  /// implementation in a type erased interface.
1041  template <typename T> class Model;
1042 
1043  std::unique_ptr<Concept> TTIImpl;
1044 };
1045 
1047 public:
1048  virtual ~Concept() = 0;
1049  virtual const DataLayout &getDataLayout() const = 0;
1050  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1051  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1052  ArrayRef<const Value *> Operands) = 0;
1053  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1054  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1055  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1056  virtual int getCallCost(const Function *F,
1057  ArrayRef<const Value *> Arguments, const User *U) = 0;
1058  virtual unsigned getInliningThresholdMultiplier() = 0;
1059  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1060  ArrayRef<Type *> ParamTys, const User *U) = 0;
1061  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1062  ArrayRef<const Value *> Arguments,
1063  const User *U) = 0;
1064  virtual int getMemcpyCost(const Instruction *I) = 0;
1065  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1066  unsigned &JTSize) = 0;
1067  virtual int
1068  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1069  virtual bool hasBranchDivergence() = 0;
1070  virtual bool isSourceOfDivergence(const Value *V) = 0;
1071  virtual bool isAlwaysUniform(const Value *V) = 0;
1072  virtual unsigned getFlatAddressSpace() = 0;
1073  virtual bool isLoweredToCall(const Function *F) = 0;
1074  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1075  UnrollingPreferences &UP) = 0;
1076  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1077  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1078  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1079  int64_t BaseOffset, bool HasBaseReg,
1080  int64_t Scale,
1081  unsigned AddrSpace,
1082  Instruction *I) = 0;
1083  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1085  virtual bool canMacroFuseCmp() = 0;
1086  virtual bool shouldFavorPostInc() const = 0;
1087  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1088  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1089  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1090  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1091  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1092  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1093  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1094  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1095  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1096  virtual bool prefersVectorizedAddressing() = 0;
1097  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1098  int64_t BaseOffset, bool HasBaseReg,
1099  int64_t Scale, unsigned AddrSpace) = 0;
1100  virtual bool LSRWithInstrQueries() = 0;
1101  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1102  virtual bool isProfitableToHoist(Instruction *I) = 0;
1103  virtual bool useAA() = 0;
1104  virtual bool isTypeLegal(Type *Ty) = 0;
1105  virtual unsigned getJumpBufAlignment() = 0;
1106  virtual unsigned getJumpBufSize() = 0;
1107  virtual bool shouldBuildLookupTables() = 0;
1108  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1109  virtual bool useColdCCForColdCall(Function &F) = 0;
1110  virtual unsigned
1111  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1112  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1113  unsigned VF) = 0;
1114  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1115  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1116  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1117  bool IsZeroCmp) const = 0;
1118  virtual bool enableInterleavedAccessVectorization() = 0;
1119  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1120  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1121  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1122  unsigned BitWidth,
1123  unsigned AddressSpace,
1124  unsigned Alignment,
1125  bool *Fast) = 0;
1126  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1127  virtual bool haveFastSqrt(Type *Ty) = 0;
1128  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1129  virtual int getFPOpCost(Type *Ty) = 0;
1130  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1131  Type *Ty) = 0;
1132  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1133  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1134  Type *Ty) = 0;
1135  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1136  Type *Ty) = 0;
1137  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1138  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1139  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1140  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1141  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1142  virtual bool shouldConsiderAddressTypePromotion(
1143  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1144  virtual unsigned getCacheLineSize() = 0;
1145  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1146  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1147  virtual unsigned getPrefetchDistance() = 0;
1148  virtual unsigned getMinPrefetchStride() = 0;
1149  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1150  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1151  virtual unsigned
1152  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1153  OperandValueKind Opd2Info,
1154  OperandValueProperties Opd1PropInfo,
1155  OperandValueProperties Opd2PropInfo,
1156  ArrayRef<const Value *> Args) = 0;
1157  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1158  Type *SubTp) = 0;
1159  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1160  const Instruction *I) = 0;
1161  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1162  VectorType *VecTy, unsigned Index) = 0;
1163  virtual int getCFInstrCost(unsigned Opcode) = 0;
1164  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1165  Type *CondTy, const Instruction *I) = 0;
1166  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1167  unsigned Index) = 0;
1168  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1169  unsigned AddressSpace, const Instruction *I) = 0;
1170  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1171  unsigned Alignment,
1172  unsigned AddressSpace) = 0;
1173  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1174  Value *Ptr, bool VariableMask,
1175  unsigned Alignment) = 0;
1176  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1177  unsigned Factor,
1178  ArrayRef<unsigned> Indices,
1179  unsigned Alignment,
1180  unsigned AddressSpace,
1181  bool UseMaskForCond = false,
1182  bool UseMaskForGaps = false) = 0;
1183  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1184  bool IsPairwiseForm) = 0;
1185  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1186  bool IsPairwiseForm, bool IsUnsigned) = 0;
1187  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1189  unsigned ScalarizationCostPassed) = 0;
1190  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1191  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1192  virtual int getCallInstrCost(Function *F, Type *RetTy,
1193  ArrayRef<Type *> Tys) = 0;
1194  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1195  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1196  const SCEV *Ptr) = 0;
1197  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1198  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1199  MemIntrinsicInfo &Info) = 0;
1200  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1201  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1202  Type *ExpectedType) = 0;
1203  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1204  unsigned SrcAlign,
1205  unsigned DestAlign) const = 0;
1206  virtual void getMemcpyLoopResidualLoweringType(
1207  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1208  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1209  virtual bool areInlineCompatible(const Function *Caller,
1210  const Function *Callee) const = 0;
1211  virtual bool
1212  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1213  SmallPtrSetImpl<Argument *> &Args) const = 0;
1214  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1215  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1216  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1217  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1218  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1219  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1220  unsigned Alignment,
1221  unsigned AddrSpace) const = 0;
1222  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1223  unsigned Alignment,
1224  unsigned AddrSpace) const = 0;
1225  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1226  unsigned ChainSizeInBytes,
1227  VectorType *VecTy) const = 0;
1228  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1229  unsigned ChainSizeInBytes,
1230  VectorType *VecTy) const = 0;
1231  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1232  ReductionFlags) const = 0;
1233  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1234  virtual int getInstructionLatency(const Instruction *I) = 0;
1235 };
1236 
1237 template <typename T>
1239  T Impl;
1240 
1241 public:
1242  Model(T Impl) : Impl(std::move(Impl)) {}
1243  ~Model() override {}
1244 
1245  const DataLayout &getDataLayout() const override {
1246  return Impl.getDataLayout();
1247  }
1248 
1249  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1250  return Impl.getOperationCost(Opcode, Ty, OpTy);
1251  }
1252  int getGEPCost(Type *PointeeType, const Value *Ptr,
1253  ArrayRef<const Value *> Operands) override {
1254  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1255  }
1256  int getExtCost(const Instruction *I, const Value *Src) override {
1257  return Impl.getExtCost(I, Src);
1258  }
1259  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1260  return Impl.getCallCost(FTy, NumArgs, U);
1261  }
1262  int getCallCost(const Function *F, int NumArgs, const User *U) override {
1263  return Impl.getCallCost(F, NumArgs, U);
1264  }
1265  int getCallCost(const Function *F,
1266  ArrayRef<const Value *> Arguments, const User *U) override {
1267  return Impl.getCallCost(F, Arguments, U);
1268  }
1269  unsigned getInliningThresholdMultiplier() override {
1270  return Impl.getInliningThresholdMultiplier();
1271  }
1272  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1273  ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1274  return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1275  }
1276  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1277  ArrayRef<const Value *> Arguments,
1278  const User *U = nullptr) override {
1279  return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1280  }
1281  int getMemcpyCost(const Instruction *I) override {
1282  return Impl.getMemcpyCost(I);
1283  }
1284  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1285  return Impl.getUserCost(U, Operands);
1286  }
1287  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1288  bool isSourceOfDivergence(const Value *V) override {
1289  return Impl.isSourceOfDivergence(V);
1290  }
1291 
1292  bool isAlwaysUniform(const Value *V) override {
1293  return Impl.isAlwaysUniform(V);
1294  }
1295 
1296  unsigned getFlatAddressSpace() override {
1297  return Impl.getFlatAddressSpace();
1298  }
1299 
1300  bool isLoweredToCall(const Function *F) override {
1301  return Impl.isLoweredToCall(F);
1302  }
1303  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1304  UnrollingPreferences &UP) override {
1305  return Impl.getUnrollingPreferences(L, SE, UP);
1306  }
1307  bool isLegalAddImmediate(int64_t Imm) override {
1308  return Impl.isLegalAddImmediate(Imm);
1309  }
1310  bool isLegalICmpImmediate(int64_t Imm) override {
1311  return Impl.isLegalICmpImmediate(Imm);
1312  }
1313  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1314  bool HasBaseReg, int64_t Scale,
1315  unsigned AddrSpace,
1316  Instruction *I) override {
1317  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1318  Scale, AddrSpace, I);
1319  }
1320  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1321  TargetTransformInfo::LSRCost &C2) override {
1322  return Impl.isLSRCostLess(C1, C2);
1323  }
1324  bool canMacroFuseCmp() override {
1325  return Impl.canMacroFuseCmp();
1326  }
1327  bool shouldFavorPostInc() const override {
1328  return Impl.shouldFavorPostInc();
1329  }
1330  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1331  return Impl.shouldFavorBackedgeIndex(L);
1332  }
1333  bool isLegalMaskedStore(Type *DataType) override {
1334  return Impl.isLegalMaskedStore(DataType);
1335  }
1336  bool isLegalMaskedLoad(Type *DataType) override {
1337  return Impl.isLegalMaskedLoad(DataType);
1338  }
1339  bool isLegalMaskedScatter(Type *DataType) override {
1340  return Impl.isLegalMaskedScatter(DataType);
1341  }
1342  bool isLegalMaskedGather(Type *DataType) override {
1343  return Impl.isLegalMaskedGather(DataType);
1344  }
1345  bool isLegalMaskedCompressStore(Type *DataType) override {
1346  return Impl.isLegalMaskedCompressStore(DataType);
1347  }
1348  bool isLegalMaskedExpandLoad(Type *DataType) override {
1349  return Impl.isLegalMaskedExpandLoad(DataType);
1350  }
1351  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1352  return Impl.hasDivRemOp(DataType, IsSigned);
1353  }
1354  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1355  return Impl.hasVolatileVariant(I, AddrSpace);
1356  }
1357  bool prefersVectorizedAddressing() override {
1358  return Impl.prefersVectorizedAddressing();
1359  }
1360  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1361  bool HasBaseReg, int64_t Scale,
1362  unsigned AddrSpace) override {
1363  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1364  Scale, AddrSpace);
1365  }
1366  bool LSRWithInstrQueries() override {
1367  return Impl.LSRWithInstrQueries();
1368  }
1369  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1370  return Impl.isTruncateFree(Ty1, Ty2);
1371  }
1372  bool isProfitableToHoist(Instruction *I) override {
1373  return Impl.isProfitableToHoist(I);
1374  }
1375  bool useAA() override { return Impl.useAA(); }
1376  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1377  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1378  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1379  bool shouldBuildLookupTables() override {
1380  return Impl.shouldBuildLookupTables();
1381  }
1382  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1383  return Impl.shouldBuildLookupTablesForConstant(C);
1384  }
1385  bool useColdCCForColdCall(Function &F) override {
1386  return Impl.useColdCCForColdCall(F);
1387  }
1388 
1389  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1390  bool Extract) override {
1391  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1392  }
1393  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1394  unsigned VF) override {
1395  return Impl.getOperandsScalarizationOverhead(Args, VF);
1396  }
1397 
1398  bool supportsEfficientVectorElementLoadStore() override {
1399  return Impl.supportsEfficientVectorElementLoadStore();
1400  }
1401 
1402  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1403  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1404  }
1405  const MemCmpExpansionOptions *enableMemCmpExpansion(
1406  bool IsZeroCmp) const override {
1407  return Impl.enableMemCmpExpansion(IsZeroCmp);
1408  }
1409  bool enableInterleavedAccessVectorization() override {
1410  return Impl.enableInterleavedAccessVectorization();
1411  }
1412  bool enableMaskedInterleavedAccessVectorization() override {
1413  return Impl.enableMaskedInterleavedAccessVectorization();
1414  }
1415  bool isFPVectorizationPotentiallyUnsafe() override {
1416  return Impl.isFPVectorizationPotentiallyUnsafe();
1417  }
1418  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1419  unsigned BitWidth, unsigned AddressSpace,
1420  unsigned Alignment, bool *Fast) override {
1421  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1422  Alignment, Fast);
1423  }
1424  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1425  return Impl.getPopcntSupport(IntTyWidthInBit);
1426  }
1427  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1428 
1429  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1430  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1431  }
1432 
1433  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1434 
1435  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1436  Type *Ty) override {
1437  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1438  }
1439  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1440  return Impl.getIntImmCost(Imm, Ty);
1441  }
1442  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1443  Type *Ty) override {
1444  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1445  }
1446  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1447  Type *Ty) override {
1448  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1449  }
1450  unsigned getNumberOfRegisters(bool Vector) override {
1451  return Impl.getNumberOfRegisters(Vector);
1452  }
1453  unsigned getRegisterBitWidth(bool Vector) const override {
1454  return Impl.getRegisterBitWidth(Vector);
1455  }
1456  unsigned getMinVectorRegisterBitWidth() override {
1457  return Impl.getMinVectorRegisterBitWidth();
1458  }
1459  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1460  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1461  }
1462  unsigned getMinimumVF(unsigned ElemWidth) const override {
1463  return Impl.getMinimumVF(ElemWidth);
1464  }
1465  bool shouldConsiderAddressTypePromotion(
1466  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1467  return Impl.shouldConsiderAddressTypePromotion(
1468  I, AllowPromotionWithoutCommonHeader);
1469  }
1470  unsigned getCacheLineSize() override {
1471  return Impl.getCacheLineSize();
1472  }
1473  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1474  return Impl.getCacheSize(Level);
1475  }
1476  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1477  return Impl.getCacheAssociativity(Level);
1478  }
1479  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1480  unsigned getMinPrefetchStride() override {
1481  return Impl.getMinPrefetchStride();
1482  }
1483  unsigned getMaxPrefetchIterationsAhead() override {
1484  return Impl.getMaxPrefetchIterationsAhead();
1485  }
1486  unsigned getMaxInterleaveFactor(unsigned VF) override {
1487  return Impl.getMaxInterleaveFactor(VF);
1488  }
1489  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1490  unsigned &JTSize) override {
1491  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1492  }
1493  unsigned
1494  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1495  OperandValueKind Opd2Info,
1496  OperandValueProperties Opd1PropInfo,
1497  OperandValueProperties Opd2PropInfo,
1498  ArrayRef<const Value *> Args) override {
1499  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1500  Opd1PropInfo, Opd2PropInfo, Args);
1501  }
1502  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1503  Type *SubTp) override {
1504  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1505  }
1506  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1507  const Instruction *I) override {
1508  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1509  }
1510  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1511  unsigned Index) override {
1512  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1513  }
1514  int getCFInstrCost(unsigned Opcode) override {
1515  return Impl.getCFInstrCost(Opcode);
1516  }
1517  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1518  const Instruction *I) override {
1519  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1520  }
1521  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1522  return Impl.getVectorInstrCost(Opcode, Val, Index);
1523  }
1524  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1525  unsigned AddressSpace, const Instruction *I) override {
1526  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1527  }
1528  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1529  unsigned AddressSpace) override {
1530  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1531  }
1532  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1533  Value *Ptr, bool VariableMask,
1534  unsigned Alignment) override {
1535  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1536  Alignment);
1537  }
1538  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1539  ArrayRef<unsigned> Indices, unsigned Alignment,
1540  unsigned AddressSpace, bool UseMaskForCond,
1541  bool UseMaskForGaps) override {
1542  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1543  Alignment, AddressSpace,
1544  UseMaskForCond, UseMaskForGaps);
1545  }
1546  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1547  bool IsPairwiseForm) override {
1548  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1549  }
1550  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1551  bool IsPairwiseForm, bool IsUnsigned) override {
1552  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1553  }
1554  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1555  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1556  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1557  ScalarizationCostPassed);
1558  }
1559  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1560  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1561  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1562  }
1563  int getCallInstrCost(Function *F, Type *RetTy,
1564  ArrayRef<Type *> Tys) override {
1565  return Impl.getCallInstrCost(F, RetTy, Tys);
1566  }
1567  unsigned getNumberOfParts(Type *Tp) override {
1568  return Impl.getNumberOfParts(Tp);
1569  }
1570  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1571  const SCEV *Ptr) override {
1572  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1573  }
1574  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1575  return Impl.getCostOfKeepingLiveOverCall(Tys);
1576  }
1577  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1578  MemIntrinsicInfo &Info) override {
1579  return Impl.getTgtMemIntrinsic(Inst, Info);
1580  }
1581  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1582  return Impl.getAtomicMemIntrinsicMaxElementSize();
1583  }
1584  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1585  Type *ExpectedType) override {
1586  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1587  }
1588  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1589  unsigned SrcAlign,
1590  unsigned DestAlign) const override {
1591  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1592  }
1593  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1594  LLVMContext &Context,
1595  unsigned RemainingBytes,
1596  unsigned SrcAlign,
1597  unsigned DestAlign) const override {
1598  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1599  SrcAlign, DestAlign);
1600  }
1601  bool areInlineCompatible(const Function *Caller,
1602  const Function *Callee) const override {
1603  return Impl.areInlineCompatible(Caller, Callee);
1604  }
1606  const Function *Caller, const Function *Callee,
1607  SmallPtrSetImpl<Argument *> &Args) const override {
1608  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1609  }
1610  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1611  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1612  }
1613  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1614  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1615  }
1616  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1617  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1618  }
1619  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1620  return Impl.isLegalToVectorizeLoad(LI);
1621  }
1622  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1623  return Impl.isLegalToVectorizeStore(SI);
1624  }
1625  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1626  unsigned Alignment,
1627  unsigned AddrSpace) const override {
1628  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1629  AddrSpace);
1630  }
1631  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1632  unsigned Alignment,
1633  unsigned AddrSpace) const override {
1634  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1635  AddrSpace);
1636  }
1637  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1638  unsigned ChainSizeInBytes,
1639  VectorType *VecTy) const override {
1640  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1641  }
1642  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1643  unsigned ChainSizeInBytes,
1644  VectorType *VecTy) const override {
1645  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1646  }
1647  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1648  ReductionFlags Flags) const override {
1649  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1650  }
1651  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1652  return Impl.shouldExpandReduction(II);
1653  }
1654  int getInstructionLatency(const Instruction *I) override {
1655  return Impl.getInstructionLatency(I);
1656  }
1657 };
1658 
1659 template <typename T>
1661  : TTIImpl(new Model<T>(Impl)) {}
1662 
1663 /// Analysis pass providing the \c TargetTransformInfo.
1664 ///
1665 /// The core idea of the TargetIRAnalysis is to expose an interface through
1666 /// which LLVM targets can analyze and provide information about the middle
1667 /// end's target-independent IR. This supports use cases such as target-aware
1668 /// cost modeling of IR constructs.
1669 ///
1670 /// This is a function analysis because much of the cost modeling for targets
1671 /// is done in a subtarget specific way and LLVM supports compiling different
1672 /// functions targeting different subtargets in order to support runtime
1673 /// dispatch according to the observed subtarget.
1674 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1675 public:
1677 
1678  /// Default construct a target IR analysis.
1679  ///
1680  /// This will use the module's datalayout to construct a baseline
1681  /// conservative TTI result.
1682  TargetIRAnalysis();
1683 
1684  /// Construct an IR analysis pass around a target-provide callback.
1685  ///
1686  /// The callback will be called with a particular function for which the TTI
1687  /// is needed and must return a TTI object for that function.
1688  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1689 
1690  // Value semantics. We spell out the constructors for MSVC.
1692  : TTICallback(Arg.TTICallback) {}
1694  : TTICallback(std::move(Arg.TTICallback)) {}
1696  TTICallback = RHS.TTICallback;
1697  return *this;
1698  }
1700  TTICallback = std::move(RHS.TTICallback);
1701  return *this;
1702  }
1703 
1704  Result run(const Function &F, FunctionAnalysisManager &);
1705 
1706 private:
1708  static AnalysisKey Key;
1709 
1710  /// The callback used to produce a result.
1711  ///
1712  /// We use a completely opaque callback so that targets can provide whatever
1713  /// mechanism they desire for constructing the TTI for a given function.
1714  ///
1715  /// FIXME: Should we really use std::function? It's relatively inefficient.
1716  /// It might be possible to arrange for even stateful callbacks to outlive
1717  /// the analysis and thus use a function_ref which would be lighter weight.
1718  /// This may also be less error prone as the callback is likely to reference
1719  /// the external TargetMachine, and that reference needs to never dangle.
1720  std::function<Result(const Function &)> TTICallback;
1721 
1722  /// Helper function used as the callback in the default constructor.
1723  static Result getDefaultTTI(const Function &F);
1724 };
1725 
1726 /// Wrapper pass for TargetTransformInfo.
1727 ///
1728 /// This pass can be constructed from a TTI object which it stores internally
1729 /// and is queried by passes.
1731  TargetIRAnalysis TIRA;
1733 
1734  virtual void anchor();
1735 
1736 public:
1737  static char ID;
1738 
1739  /// We must provide a default constructor for the pass but it should
1740  /// never be used.
1741  ///
1742  /// Use the constructor below or call one of the creation routines.
1744 
1746 
1747  TargetTransformInfo &getTTI(const Function &F);
1748 };
1749 
1750 /// Create an analysis pass wrapper around a TTI object.
1751 ///
1752 /// This analysis pass just holds the TTI instance and makes it available to
1753 /// clients.
1755 
1756 } // End llvm namespace
1757 
1758 #endif
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:102
PopcntSupportKind
Flags indicating the kind of support for population count.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
Reverse the order of the vector.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:389
AMDGPU Lower Kernel Arguments
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:424
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:648
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:72
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.