LLVM  9.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include <functional>
31 
32 namespace llvm {
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 class AssumptionCache;
39 class BranchInst;
40 class Function;
41 class GlobalValue;
42 class IntrinsicInst;
43 class LoadInst;
44 class Loop;
45 class SCEV;
46 class ScalarEvolution;
47 class StoreInst;
48 class SwitchInst;
49 class TargetLibraryInfo;
50 class Type;
51 class User;
52 class Value;
53 
54 /// Information about a load/store intrinsic defined by the target.
56  /// This is the pointer that the intrinsic is loading from or storing to.
57  /// If this is non-null, then analysis/optimization passes can assume that
58  /// this intrinsic is functionally equivalent to a load/store from this
59  /// pointer.
60  Value *PtrVal = nullptr;
61 
62  // Ordering for atomic operations.
64 
65  // Same Id is set by the target for corresponding load/store intrinsics.
66  unsigned short MatchingId = 0;
67 
68  bool ReadMem = false;
69  bool WriteMem = false;
70  bool IsVolatile = false;
71 
72  bool isUnordered() const {
73  return (Ordering == AtomicOrdering::NotAtomic ||
74  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
75  }
76 };
77 
78 /// This pass provides access to the codegen interfaces that are needed
79 /// for IR-level transformations.
81 public:
82  /// Construct a TTI object using a type implementing the \c Concept
83  /// API below.
84  ///
85  /// This is used by targets to construct a TTI wrapping their target-specific
86  /// implementation that encodes appropriate costs for their target.
87  template <typename T> TargetTransformInfo(T Impl);
88 
89  /// Construct a baseline TTI object using a minimal implementation of
90  /// the \c Concept API below.
91  ///
92  /// The TTI implementation will reflect the information in the DataLayout
93  /// provided if non-null.
94  explicit TargetTransformInfo(const DataLayout &DL);
95 
96  // Provide move semantics.
98  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
99 
100  // We need to define the destructor out-of-line to define our sub-classes
101  // out-of-line.
103 
104  /// Handle the invalidation of this information.
105  ///
106  /// When used as a result of \c TargetIRAnalysis this method will be called
107  /// when the function this was computed for changes. When it returns false,
108  /// the information is preserved across those changes.
111  // FIXME: We should probably in some way ensure that the subtarget
112  // information for a function hasn't changed.
113  return false;
114  }
115 
116  /// \name Generic Target Information
117  /// @{
118 
119  /// The kind of cost model.
120  ///
121  /// There are several different cost models that can be customized by the
122  /// target. The normalization of each cost model may be target specific.
124  TCK_RecipThroughput, ///< Reciprocal throughput.
125  TCK_Latency, ///< The latency of instruction.
126  TCK_CodeSize ///< Instruction code size.
127  };
128 
129  /// Query the cost of a specified instruction.
130  ///
131  /// Clients should use this interface to query the cost of an existing
132  /// instruction. The instruction must have a valid parent (basic block).
133  ///
134  /// Note, this method does not cache the cost calculation and it
135  /// can be expensive in some cases.
136  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
137  switch (kind){
138  case TCK_RecipThroughput:
139  return getInstructionThroughput(I);
140 
141  case TCK_Latency:
142  return getInstructionLatency(I);
143 
144  case TCK_CodeSize:
145  return getUserCost(I);
146  }
147  llvm_unreachable("Unknown instruction cost kind");
148  }
149 
150  /// Underlying constants for 'cost' values in this interface.
151  ///
152  /// Many APIs in this interface return a cost. This enum defines the
153  /// fundamental values that should be used to interpret (and produce) those
154  /// costs. The costs are returned as an int rather than a member of this
155  /// enumeration because it is expected that the cost of one IR instruction
156  /// may have a multiplicative factor to it or otherwise won't fit directly
157  /// into the enum. Moreover, it is common to sum or average costs which works
158  /// better as simple integral values. Thus this enum only provides constants.
159  /// Also note that the returned costs are signed integers to make it natural
160  /// to add, subtract, and test with zero (a common boundary condition). It is
161  /// not expected that 2^32 is a realistic cost to be modeling at any point.
162  ///
163  /// Note that these costs should usually reflect the intersection of code-size
164  /// cost and execution cost. A free instruction is typically one that folds
165  /// into another instruction. For example, reg-to-reg moves can often be
166  /// skipped by renaming the registers in the CPU, but they still are encoded
167  /// and thus wouldn't be considered 'free' here.
169  TCC_Free = 0, ///< Expected to fold away in lowering.
170  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
171  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
172  };
173 
174  /// Estimate the cost of a specific operation when lowered.
175  ///
176  /// Note that this is designed to work on an arbitrary synthetic opcode, and
177  /// thus work for hypothetical queries before an instruction has even been
178  /// formed. However, this does *not* work for GEPs, and must not be called
179  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
180  /// analyzing a GEP's cost required more information.
181  ///
182  /// Typically only the result type is required, and the operand type can be
183  /// omitted. However, if the opcode is one of the cast instructions, the
184  /// operand type is required.
185  ///
186  /// The returned cost is defined in terms of \c TargetCostConstants, see its
187  /// comments for a detailed explanation of the cost values.
188  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
189 
190  /// Estimate the cost of a GEP operation when lowered.
191  ///
192  /// The contract for this function is the same as \c getOperationCost except
193  /// that it supports an interface that provides extra information specific to
194  /// the GEP operation.
195  int getGEPCost(Type *PointeeType, const Value *Ptr,
196  ArrayRef<const Value *> Operands) const;
197 
198  /// Estimate the cost of a EXT operation when lowered.
199  ///
200  /// The contract for this function is the same as \c getOperationCost except
201  /// that it supports an interface that provides extra information specific to
202  /// the EXT operation.
203  int getExtCost(const Instruction *I, const Value *Src) const;
204 
205  /// Estimate the cost of a function call when lowered.
206  ///
207  /// The contract for this is the same as \c getOperationCost except that it
208  /// supports an interface that provides extra information specific to call
209  /// instructions.
210  ///
211  /// This is the most basic query for estimating call cost: it only knows the
212  /// function type and (potentially) the number of arguments at the call site.
213  /// The latter is only interesting for varargs function types.
214  int getCallCost(FunctionType *FTy, int NumArgs = -1,
215  const User *U = nullptr) const;
216 
217  /// Estimate the cost of calling a specific function when lowered.
218  ///
219  /// This overload adds the ability to reason about the particular function
220  /// being called in the event it is a library call with special lowering.
221  int getCallCost(const Function *F, int NumArgs = -1,
222  const User *U = nullptr) const;
223 
224  /// Estimate the cost of calling a specific function when lowered.
225  ///
226  /// This overload allows specifying a set of candidate argument values.
227  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
228  const User *U = nullptr) const;
229 
230  /// \returns A value by which our inlining threshold should be multiplied.
231  /// This is primarily used to bump up the inlining threshold wholesale on
232  /// targets where calls are unusually expensive.
233  ///
234  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
235  /// individual classes of instructions would be better.
236  unsigned getInliningThresholdMultiplier() const;
237 
238  /// Estimate the cost of an intrinsic when lowered.
239  ///
240  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
241  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
242  ArrayRef<Type *> ParamTys,
243  const User *U = nullptr) const;
244 
245  /// Estimate the cost of an intrinsic when lowered.
246  ///
247  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
248  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
250  const User *U = nullptr) const;
251 
252  /// \return the expected cost of a memcpy, which could e.g. depend on the
253  /// source/destination type and alignment and the number of bytes copied.
254  int getMemcpyCost(const Instruction *I) const;
255 
256  /// \return The estimated number of case clusters when lowering \p 'SI'.
257  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
258  /// table.
259  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
260  unsigned &JTSize) const;
261 
262  /// Estimate the cost of a given IR user when lowered.
263  ///
264  /// This can estimate the cost of either a ConstantExpr or Instruction when
265  /// lowered. It has two primary advantages over the \c getOperationCost and
266  /// \c getGEPCost above, and one significant disadvantage: it can only be
267  /// used when the IR construct has already been formed.
268  ///
269  /// The advantages are that it can inspect the SSA use graph to reason more
270  /// accurately about the cost. For example, all-constant-GEPs can often be
271  /// folded into a load or other instruction, but if they are used in some
272  /// other context they may not be folded. This routine can distinguish such
273  /// cases.
274  ///
275  /// \p Operands is a list of operands which can be a result of transformations
276  /// of the current operands. The number of the operands on the list must equal
277  /// to the number of the current operands the IR user has. Their order on the
278  /// list must be the same as the order of the current operands the IR user
279  /// has.
280  ///
281  /// The returned cost is defined in terms of \c TargetCostConstants, see its
282  /// comments for a detailed explanation of the cost values.
283  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
284 
285  /// This is a helper function which calls the two-argument getUserCost
286  /// with \p Operands which are the current operands U has.
287  int getUserCost(const User *U) const {
289  U->value_op_end());
290  return getUserCost(U, Operands);
291  }
292 
293  /// Return true if branch divergence exists.
294  ///
295  /// Branch divergence has a significantly negative impact on GPU performance
296  /// when threads in the same wavefront take different paths due to conditional
297  /// branches.
298  bool hasBranchDivergence() const;
299 
300  /// Returns whether V is a source of divergence.
301  ///
302  /// This function provides the target-dependent information for
303  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
304  /// builds the dependency graph, and then runs the reachability algorithm
305  /// starting with the sources of divergence.
306  bool isSourceOfDivergence(const Value *V) const;
307 
308  // Returns true for the target specific
309  // set of operations which produce uniform result
310  // even taking non-uniform arguments
311  bool isAlwaysUniform(const Value *V) const;
312 
313  /// Returns the address space ID for a target's 'flat' address space. Note
314  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
315  /// refers to as the generic address space. The flat address space is a
316  /// generic address space that can be used access multiple segments of memory
317  /// with different address spaces. Access of a memory location through a
318  /// pointer with this address space is expected to be legal but slower
319  /// compared to the same memory location accessed through a pointer with a
320  /// different address space.
321  //
322  /// This is for targets with different pointer representations which can
323  /// be converted with the addrspacecast instruction. If a pointer is converted
324  /// to this address space, optimizations should attempt to replace the access
325  /// with the source address space.
326  ///
327  /// \returns ~0u if the target does not have such a flat address space to
328  /// optimize away.
329  unsigned getFlatAddressSpace() const;
330 
331  /// Test whether calls to a function lower to actual program function
332  /// calls.
333  ///
334  /// The idea is to test whether the program is likely to require a 'call'
335  /// instruction or equivalent in order to call the given function.
336  ///
337  /// FIXME: It's not clear that this is a good or useful query API. Client's
338  /// should probably move to simpler cost metrics using the above.
339  /// Alternatively, we could split the cost interface into distinct code-size
340  /// and execution-speed costs. This would allow modelling the core of this
341  /// query more accurately as a call is a single small instruction, but
342  /// incurs significant execution cost.
343  bool isLoweredToCall(const Function *F) const;
344 
345  struct LSRCost {
346  /// TODO: Some of these could be merged. Also, a lexical ordering
347  /// isn't always optimal.
348  unsigned Insns;
349  unsigned NumRegs;
350  unsigned AddRecCost;
351  unsigned NumIVMuls;
352  unsigned NumBaseAdds;
353  unsigned ImmCost;
354  unsigned SetupCost;
355  unsigned ScaleCost;
356  };
357 
358  /// Parameters that control the generic loop unrolling transformation.
360  /// The cost threshold for the unrolled loop. Should be relative to the
361  /// getUserCost values returned by this API, and the expectation is that
362  /// the unrolled loop's instructions when run through that interface should
363  /// not exceed this cost. However, this is only an estimate. Also, specific
364  /// loops may be unrolled even with a cost above this threshold if deemed
365  /// profitable. Set this to UINT_MAX to disable the loop body cost
366  /// restriction.
367  unsigned Threshold;
368  /// If complete unrolling will reduce the cost of the loop, we will boost
369  /// the Threshold by a certain percent to allow more aggressive complete
370  /// unrolling. This value provides the maximum boost percentage that we
371  /// can apply to Threshold (The value should be no less than 100).
372  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
373  /// MaxPercentThresholdBoost / 100)
374  /// E.g. if complete unrolling reduces the loop execution time by 50%
375  /// then we boost the threshold by the factor of 2x. If unrolling is not
376  /// expected to reduce the running time, then we do not increase the
377  /// threshold.
379  /// The cost threshold for the unrolled loop when optimizing for size (set
380  /// to UINT_MAX to disable).
382  /// The cost threshold for the unrolled loop, like Threshold, but used
383  /// for partial/runtime unrolling (set to UINT_MAX to disable).
385  /// The cost threshold for the unrolled loop when optimizing for size, like
386  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
387  /// UINT_MAX to disable).
389  /// A forced unrolling factor (the number of concatenated bodies of the
390  /// original loop in the unrolled loop body). When set to 0, the unrolling
391  /// transformation will select an unrolling factor based on the current cost
392  /// threshold and other factors.
393  unsigned Count;
394  /// A forced peeling factor (the number of bodied of the original loop
395  /// that should be peeled off before the loop body). When set to 0, the
396  /// unrolling transformation will select a peeling factor based on profile
397  /// information and other factors.
398  unsigned PeelCount;
399  /// Default unroll count for loops with run-time trip count.
401  // Set the maximum unrolling factor. The unrolling factor may be selected
402  // using the appropriate cost threshold, but may not exceed this number
403  // (set to UINT_MAX to disable). This does not apply in cases where the
404  // loop is being fully unrolled.
405  unsigned MaxCount;
406  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
407  /// applies even if full unrolling is selected. This allows a target to fall
408  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
410  // Represents number of instructions optimized when "back edge"
411  // becomes "fall through" in unrolled loop.
412  // For now we count a conditional branch on a backedge and a comparison
413  // feeding it.
414  unsigned BEInsns;
415  /// Allow partial unrolling (unrolling of loops to expand the size of the
416  /// loop body, not only to eliminate small constant-trip-count loops).
417  bool Partial;
418  /// Allow runtime unrolling (unrolling of loops to expand the size of the
419  /// loop body even when the number of loop iterations is not known at
420  /// compile time).
421  bool Runtime;
422  /// Allow generation of a loop remainder (extra iterations after unroll).
424  /// Allow emitting expensive instructions (such as divisions) when computing
425  /// the trip count of a loop for runtime unrolling.
427  /// Apply loop unroll on any kind of loop
428  /// (mainly to loops that fail runtime unrolling).
429  bool Force;
430  /// Allow using trip count upper bound to unroll loops.
432  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
434  /// Allow unrolling of all the iterations of the runtime loop remainder.
436  /// Allow unroll and jam. Used to enable unroll and jam for the target.
438  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
439  /// value above is used during unroll and jam for the outer loop size.
440  /// This value is used in the same manner to limit the size of the inner
441  /// loop.
443  };
444 
445  /// Get target-customized preferences for the generic loop unrolling
446  /// transformation. The caller will initialize UP with the current
447  /// target-independent defaults.
448  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
449  UnrollingPreferences &UP) const;
450 
451  /// Attributes of a target dependent hardware loop. Here, the term 'element'
452  /// describes the work performed by an IR loop that has not been vectorized
453  /// by the compiler.
455  HardwareLoopInfo() = delete;
456  HardwareLoopInfo(Loop *L) : L(L) { }
457  Loop *L = nullptr;
458  BasicBlock *ExitBlock = nullptr;
459  BranchInst *ExitBranch = nullptr;
460  const SCEV *ExitCount = nullptr;
461  IntegerType *CountType = nullptr;
462  Value *LoopDecrement = nullptr; // The maximum number of elements
463  // processed in the loop body.
464  bool IsNestingLegal = false; // Can a hardware loop be a parent to
465  // another hardware loop.
466  bool CounterInReg = false; // Should loop counter be updated in
467  // the loop via a phi?
468  };
469 
470  /// Query the target whether it would be profitable to convert the given loop
471  /// into a hardware loop.
472  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
473  AssumptionCache &AC,
474  TargetLibraryInfo *LibInfo,
475  HardwareLoopInfo &HWLoopInfo) const;
476 
477  /// @}
478 
479  /// \name Scalar Target Information
480  /// @{
481 
482  /// Flags indicating the kind of support for population count.
483  ///
484  /// Compared to the SW implementation, HW support is supposed to
485  /// significantly boost the performance when the population is dense, and it
486  /// may or may not degrade performance if the population is sparse. A HW
487  /// support is considered as "Fast" if it can outperform, or is on a par
488  /// with, SW implementation when the population is sparse; otherwise, it is
489  /// considered as "Slow".
490  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
491 
492  /// Return true if the specified immediate is legal add immediate, that
493  /// is the target has add instructions which can add a register with the
494  /// immediate without having to materialize the immediate into a register.
495  bool isLegalAddImmediate(int64_t Imm) const;
496 
497  /// Return true if the specified immediate is legal icmp immediate,
498  /// that is the target has icmp instructions which can compare a register
499  /// against the immediate without having to materialize the immediate into a
500  /// register.
501  bool isLegalICmpImmediate(int64_t Imm) const;
502 
503  /// Return true if the addressing mode represented by AM is legal for
504  /// this target, for a load/store of the specified type.
505  /// The type may be VoidTy, in which case only return true if the addressing
506  /// mode is legal for a load/store of any legal type.
507  /// If target returns true in LSRWithInstrQueries(), I may be valid.
508  /// TODO: Handle pre/postinc as well.
509  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
510  bool HasBaseReg, int64_t Scale,
511  unsigned AddrSpace = 0,
512  Instruction *I = nullptr) const;
513 
514  /// Return true if LSR cost of C1 is lower than C1.
515  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
516  TargetTransformInfo::LSRCost &C2) const;
517 
518  /// Return true if the target can fuse a compare and branch.
519  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
520  /// calculation for the instructions in a loop.
521  bool canMacroFuseCmp() const;
522 
523  /// \return True is LSR should make efforts to create/preserve post-inc
524  /// addressing mode expressions.
525  bool shouldFavorPostInc() const;
526 
527  /// Return true if LSR should make efforts to generate indexed addressing
528  /// modes that operate across loop iterations.
529  bool shouldFavorBackedgeIndex(const Loop *L) const;
530 
531  /// Return true if the target supports masked load.
532  bool isLegalMaskedStore(Type *DataType) const;
533  /// Return true if the target supports masked store.
534  bool isLegalMaskedLoad(Type *DataType) const;
535 
536  /// Return true if the target supports masked scatter.
537  bool isLegalMaskedScatter(Type *DataType) const;
538  /// Return true if the target supports masked gather.
539  bool isLegalMaskedGather(Type *DataType) const;
540 
541  /// Return true if the target supports masked compress store.
542  bool isLegalMaskedCompressStore(Type *DataType) const;
543  /// Return true if the target supports masked expand load.
544  bool isLegalMaskedExpandLoad(Type *DataType) const;
545 
546  /// Return true if the target has a unified operation to calculate division
547  /// and remainder. If so, the additional implicit multiplication and
548  /// subtraction required to calculate a remainder from division are free. This
549  /// can enable more aggressive transformations for division and remainder than
550  /// would typically be allowed using throughput or size cost models.
551  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
552 
553  /// Return true if the given instruction (assumed to be a memory access
554  /// instruction) has a volatile variant. If that's the case then we can avoid
555  /// addrspacecast to generic AS for volatile loads/stores. Default
556  /// implementation returns false, which prevents address space inference for
557  /// volatile loads/stores.
558  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
559 
560  /// Return true if target doesn't mind addresses in vectors.
561  bool prefersVectorizedAddressing() const;
562 
563  /// Return the cost of the scaling factor used in the addressing
564  /// mode represented by AM for this target, for a load/store
565  /// of the specified type.
566  /// If the AM is supported, the return value must be >= 0.
567  /// If the AM is not supported, it returns a negative value.
568  /// TODO: Handle pre/postinc as well.
569  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
570  bool HasBaseReg, int64_t Scale,
571  unsigned AddrSpace = 0) const;
572 
573  /// Return true if the loop strength reduce pass should make
574  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
575  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
576  /// immediate offset and no index register.
577  bool LSRWithInstrQueries() const;
578 
579  /// Return true if it's free to truncate a value of type Ty1 to type
580  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
581  /// by referencing its sub-register AX.
582  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
583 
584  /// Return true if it is profitable to hoist instruction in the
585  /// then/else to before if.
586  bool isProfitableToHoist(Instruction *I) const;
587 
588  bool useAA() const;
589 
590  /// Return true if this type is legal.
591  bool isTypeLegal(Type *Ty) const;
592 
593  /// Returns the target's jmp_buf alignment in bytes.
594  unsigned getJumpBufAlignment() const;
595 
596  /// Returns the target's jmp_buf size in bytes.
597  unsigned getJumpBufSize() const;
598 
599  /// Return true if switches should be turned into lookup tables for the
600  /// target.
601  bool shouldBuildLookupTables() const;
602 
603  /// Return true if switches should be turned into lookup tables
604  /// containing this constant value for the target.
605  bool shouldBuildLookupTablesForConstant(Constant *C) const;
606 
607  /// Return true if the input function which is cold at all call sites,
608  /// should use coldcc calling convention.
609  bool useColdCCForColdCall(Function &F) const;
610 
611  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
612 
613  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
614  unsigned VF) const;
615 
616  /// If target has efficient vector element load/store instructions, it can
617  /// return true here so that insertion/extraction costs are not added to
618  /// the scalarization cost of a load/store.
619  bool supportsEfficientVectorElementLoadStore() const;
620 
621  /// Don't restrict interleaved unrolling to small loops.
622  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
623 
624  /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
625  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
627  // The list of available load sizes (in bytes), sorted in decreasing order.
629  // Set to true to allow overlapping loads. For example, 7-byte compares can
630  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
631  // requires all loads in LoadSizes to be doable in an unaligned way.
632  bool AllowOverlappingLoads = false;
633  };
634  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
635 
636  /// Enable matching of interleaved access groups.
637  bool enableInterleavedAccessVectorization() const;
638 
639  /// Enable matching of interleaved access groups that contain predicated
640  /// accesses or gaps and therefore vectorized using masked
641  /// vector loads/stores.
642  bool enableMaskedInterleavedAccessVectorization() const;
643 
644  /// Indicate that it is potentially unsafe to automatically vectorize
645  /// floating-point operations because the semantics of vector and scalar
646  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
647  /// does not support IEEE-754 denormal numbers, while depending on the
648  /// platform, scalar floating-point math does.
649  /// This applies to floating-point math operations and calls, not memory
650  /// operations, shuffles, or casts.
651  bool isFPVectorizationPotentiallyUnsafe() const;
652 
653  /// Determine if the target supports unaligned memory accesses.
654  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
655  unsigned BitWidth, unsigned AddressSpace = 0,
656  unsigned Alignment = 1,
657  bool *Fast = nullptr) const;
658 
659  /// Return hardware support for population count.
660  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
661 
662  /// Return true if the hardware has a fast square-root instruction.
663  bool haveFastSqrt(Type *Ty) const;
664 
665  /// Return true if it is faster to check if a floating-point value is NaN
666  /// (or not-NaN) versus a comparison against a constant FP zero value.
667  /// Targets should override this if materializing a 0.0 for comparison is
668  /// generally as cheap as checking for ordered/unordered.
669  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
670 
671  /// Return the expected cost of supporting the floating point operation
672  /// of the specified type.
673  int getFPOpCost(Type *Ty) const;
674 
675  /// Return the expected cost of materializing for the given integer
676  /// immediate of the specified type.
677  int getIntImmCost(const APInt &Imm, Type *Ty) const;
678 
679  /// Return the expected cost of materialization for the given integer
680  /// immediate of the specified type for a given instruction. The cost can be
681  /// zero if the immediate can be folded into the specified instruction.
682  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
683  Type *Ty) const;
684  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
685  Type *Ty) const;
686 
687  /// Return the expected cost for the given integer when optimising
688  /// for size. This is different than the other integer immediate cost
689  /// functions in that it is subtarget agnostic. This is useful when you e.g.
690  /// target one ISA such as Aarch32 but smaller encodings could be possible
691  /// with another such as Thumb. This return value is used as a penalty when
692  /// the total costs for a constant is calculated (the bigger the cost, the
693  /// more beneficial constant hoisting is).
694  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
695  Type *Ty) const;
696  /// @}
697 
698  /// \name Vector Target Information
699  /// @{
700 
701  /// The various kinds of shuffle patterns for vector queries.
702  enum ShuffleKind {
703  SK_Broadcast, ///< Broadcast element 0 to all other elements.
704  SK_Reverse, ///< Reverse the order of the vector.
705  SK_Select, ///< Selects elements from the corresponding lane of
706  ///< either source operand. This is equivalent to a
707  ///< vector select with a constant condition operand.
708  SK_Transpose, ///< Transpose two vectors.
709  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
710  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
711  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
712  ///< with any shuffle mask.
713  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
714  ///< shuffle mask.
715  };
716 
717  /// Additional information about an operand's possible values.
719  OK_AnyValue, // Operand can have any value.
720  OK_UniformValue, // Operand is uniform (splat of a value).
721  OK_UniformConstantValue, // Operand is uniform constant.
722  OK_NonUniformConstantValue // Operand is a non uniform constant value.
723  };
724 
725  /// Additional properties of an operand's values.
726  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
727 
728  /// \return The number of scalar or vector registers that the target has.
729  /// If 'Vectors' is true, it returns the number of vector registers. If it is
730  /// set to false, it returns the number of scalar registers.
731  unsigned getNumberOfRegisters(bool Vector) const;
732 
733  /// \return The width of the largest scalar or vector register type.
734  unsigned getRegisterBitWidth(bool Vector) const;
735 
736  /// \return The width of the smallest vector register type.
737  unsigned getMinVectorRegisterBitWidth() const;
738 
739  /// \return True if the vectorization factor should be chosen to
740  /// make the vector of the smallest element type match the size of a
741  /// vector register. For wider element types, this could result in
742  /// creating vectors that span multiple vector registers.
743  /// If false, the vectorization factor will be chosen based on the
744  /// size of the widest element type.
745  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
746 
747  /// \return The minimum vectorization factor for types of given element
748  /// bit width, or 0 if there is no minimum VF. The returned value only
749  /// applies when shouldMaximizeVectorBandwidth returns true.
750  unsigned getMinimumVF(unsigned ElemWidth) const;
751 
752  /// \return True if it should be considered for address type promotion.
753  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
754  /// profitable without finding other extensions fed by the same input.
755  bool shouldConsiderAddressTypePromotion(
756  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
757 
758  /// \return The size of a cache line in bytes.
759  unsigned getCacheLineSize() const;
760 
761  /// The possible cache levels
762  enum class CacheLevel {
763  L1D, // The L1 data cache
764  L2D, // The L2 data cache
765 
766  // We currently do not model L3 caches, as their sizes differ widely between
767  // microarchitectures. Also, we currently do not have a use for L3 cache
768  // size modeling yet.
769  };
770 
771  /// \return The size of the cache level in bytes, if available.
772  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
773 
774  /// \return The associativity of the cache level, if available.
775  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
776 
777  /// \return How much before a load we should place the prefetch instruction.
778  /// This is currently measured in number of instructions.
779  unsigned getPrefetchDistance() const;
780 
781  /// \return Some HW prefetchers can handle accesses up to a certain constant
782  /// stride. This is the minimum stride in bytes where it makes sense to start
783  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
784  unsigned getMinPrefetchStride() const;
785 
786  /// \return The maximum number of iterations to prefetch ahead. If the
787  /// required number of iterations is more than this number, no prefetching is
788  /// performed.
789  unsigned getMaxPrefetchIterationsAhead() const;
790 
791  /// \return The maximum interleave factor that any transform should try to
792  /// perform for this target. This number depends on the level of parallelism
793  /// and the number of execution units in the CPU.
794  unsigned getMaxInterleaveFactor(unsigned VF) const;
795 
796  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
797  static OperandValueKind getOperandInfo(Value *V,
798  OperandValueProperties &OpProps);
799 
800  /// This is an approximation of reciprocal throughput of a math/logic op.
801  /// A higher cost indicates less expected throughput.
802  /// From Agner Fog's guides, reciprocal throughput is "the average number of
803  /// clock cycles per instruction when the instructions are not part of a
804  /// limiting dependency chain."
805  /// Therefore, costs should be scaled to account for multiple execution units
806  /// on the target that can process this type of instruction. For example, if
807  /// there are 5 scalar integer units and 2 vector integer units that can
808  /// calculate an 'add' in a single cycle, this model should indicate that the
809  /// cost of the vector add instruction is 2.5 times the cost of the scalar
810  /// add instruction.
811  /// \p Args is an optional argument which holds the instruction operands
812  /// values so the TTI can analyze those values searching for special
813  /// cases or optimizations based on those values.
814  int getArithmeticInstrCost(
815  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
816  OperandValueKind Opd2Info = OK_AnyValue,
817  OperandValueProperties Opd1PropInfo = OP_None,
818  OperandValueProperties Opd2PropInfo = OP_None,
820 
821  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
822  /// The index and subtype parameters are used by the subvector insertion and
823  /// extraction shuffle kinds to show the insert/extract point and the type of
824  /// the subvector being inserted/extracted.
825  /// NOTE: For subvector extractions Tp represents the source type.
826  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
827  Type *SubTp = nullptr) const;
828 
829  /// \return The expected cost of cast instructions, such as bitcast, trunc,
830  /// zext, etc. If there is an existing instruction that holds Opcode, it
831  /// may be passed in the 'I' parameter.
832  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
833  const Instruction *I = nullptr) const;
834 
835  /// \return The expected cost of a sign- or zero-extended vector extract. Use
836  /// -1 to indicate that there is no information about the index value.
837  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
838  unsigned Index = -1) const;
839 
840  /// \return The expected cost of control-flow related instructions such as
841  /// Phi, Ret, Br.
842  int getCFInstrCost(unsigned Opcode) const;
843 
844  /// \returns The expected cost of compare and select instructions. If there
845  /// is an existing instruction that holds Opcode, it may be passed in the
846  /// 'I' parameter.
847  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
848  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
849 
850  /// \return The expected cost of vector Insert and Extract.
851  /// Use -1 to indicate that there is no information on the index value.
852  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
853 
854  /// \return The cost of Load and Store instructions.
855  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
856  unsigned AddressSpace, const Instruction *I = nullptr) const;
857 
858  /// \return The cost of masked Load and Store instructions.
859  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
860  unsigned AddressSpace) const;
861 
862  /// \return The cost of Gather or Scatter operation
863  /// \p Opcode - is a type of memory access Load or Store
864  /// \p DataTy - a vector type of the data to be loaded or stored
865  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
866  /// \p VariableMask - true when the memory access is predicated with a mask
867  /// that is not a compile-time constant
868  /// \p Alignment - alignment of single element
869  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
870  bool VariableMask, unsigned Alignment) const;
871 
872  /// \return The cost of the interleaved memory operation.
873  /// \p Opcode is the memory operation code
874  /// \p VecTy is the vector type of the interleaved access.
875  /// \p Factor is the interleave factor
876  /// \p Indices is the indices for interleaved load members (as interleaved
877  /// load allows gaps)
878  /// \p Alignment is the alignment of the memory operation
879  /// \p AddressSpace is address space of the pointer.
880  /// \p UseMaskForCond indicates if the memory access is predicated.
881  /// \p UseMaskForGaps indicates if gaps should be masked.
882  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
883  ArrayRef<unsigned> Indices, unsigned Alignment,
884  unsigned AddressSpace,
885  bool UseMaskForCond = false,
886  bool UseMaskForGaps = false) const;
887 
888  /// Calculate the cost of performing a vector reduction.
889  ///
890  /// This is the cost of reducing the vector value of type \p Ty to a scalar
891  /// value using the operation denoted by \p Opcode. The form of the reduction
892  /// can either be a pairwise reduction or a reduction that splits the vector
893  /// at every reduction level.
894  ///
895  /// Pairwise:
896  /// (v0, v1, v2, v3)
897  /// ((v0+v1), (v2+v3), undef, undef)
898  /// Split:
899  /// (v0, v1, v2, v3)
900  /// ((v0+v2), (v1+v3), undef, undef)
901  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
902  bool IsPairwiseForm) const;
903  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
904  bool IsUnsigned) const;
905 
906  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
907  /// Three cases are handled: 1. scalar instruction 2. vector instruction
908  /// 3. scalar instruction which is to be vectorized with VF.
909  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
911  unsigned VF = 1) const;
912 
913  /// \returns The cost of Intrinsic instructions. Types analysis only.
914  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
915  /// arguments and the return value will be computed based on types.
916  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
918  unsigned ScalarizationCostPassed = UINT_MAX) const;
919 
920  /// \returns The cost of Call instructions.
921  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
922 
923  /// \returns The number of pieces into which the provided type must be
924  /// split during legalization. Zero is returned when the answer is unknown.
925  unsigned getNumberOfParts(Type *Tp) const;
926 
927  /// \returns The cost of the address computation. For most targets this can be
928  /// merged into the instruction indexing mode. Some targets might want to
929  /// distinguish between address computation for memory operations on vector
930  /// types and scalar types. Such targets should override this function.
931  /// The 'SE' parameter holds pointer for the scalar evolution object which
932  /// is used in order to get the Ptr step value in case of constant stride.
933  /// The 'Ptr' parameter holds SCEV of the access pointer.
934  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
935  const SCEV *Ptr = nullptr) const;
936 
937  /// \returns The cost, if any, of keeping values of the given types alive
938  /// over a callsite.
939  ///
940  /// Some types may require the use of register classes that do not have
941  /// any callee-saved registers, so would require a spill and fill.
942  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
943 
944  /// \returns True if the intrinsic is a supported memory intrinsic. Info
945  /// will contain additional information - whether the intrinsic may write
946  /// or read to memory, volatility and the pointer. Info is undefined
947  /// if false is returned.
948  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
949 
950  /// \returns The maximum element size, in bytes, for an element
951  /// unordered-atomic memory intrinsic.
952  unsigned getAtomicMemIntrinsicMaxElementSize() const;
953 
954  /// \returns A value which is the result of the given memory intrinsic. New
955  /// instructions may be created to extract the result from the given intrinsic
956  /// memory operation. Returns nullptr if the target cannot create a result
957  /// from the given intrinsic.
958  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
959  Type *ExpectedType) const;
960 
961  /// \returns The type to use in a loop expansion of a memcpy call.
962  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
963  unsigned SrcAlign, unsigned DestAlign) const;
964 
965  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
966  /// \param RemainingBytes The number of bytes to copy.
967  ///
968  /// Calculates the operand types to use when copying \p RemainingBytes of
969  /// memory, where source and destination alignments are \p SrcAlign and
970  /// \p DestAlign respectively.
971  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
972  LLVMContext &Context,
973  unsigned RemainingBytes,
974  unsigned SrcAlign,
975  unsigned DestAlign) const;
976 
977  /// \returns True if the two functions have compatible attributes for inlining
978  /// purposes.
979  bool areInlineCompatible(const Function *Caller,
980  const Function *Callee) const;
981 
982  /// \returns True if the caller and callee agree on how \p Args will be passed
983  /// to the callee.
984  /// \param[out] Args The list of compatible arguments. The implementation may
985  /// filter out any incompatible args from this list.
986  bool areFunctionArgsABICompatible(const Function *Caller,
987  const Function *Callee,
988  SmallPtrSetImpl<Argument *> &Args) const;
989 
990  /// The type of load/store indexing.
992  MIM_Unindexed, ///< No indexing.
993  MIM_PreInc, ///< Pre-incrementing.
994  MIM_PreDec, ///< Pre-decrementing.
995  MIM_PostInc, ///< Post-incrementing.
996  MIM_PostDec ///< Post-decrementing.
997  };
998 
999  /// \returns True if the specified indexed load for the given type is legal.
1000  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1001 
1002  /// \returns True if the specified indexed store for the given type is legal.
1003  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1004 
1005  /// \returns The bitwidth of the largest vector type that should be used to
1006  /// load/store in the given address space.
1007  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1008 
1009  /// \returns True if the load instruction is legal to vectorize.
1010  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1011 
1012  /// \returns True if the store instruction is legal to vectorize.
1013  bool isLegalToVectorizeStore(StoreInst *SI) const;
1014 
1015  /// \returns True if it is legal to vectorize the given load chain.
1016  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1017  unsigned Alignment,
1018  unsigned AddrSpace) const;
1019 
1020  /// \returns True if it is legal to vectorize the given store chain.
1021  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1022  unsigned Alignment,
1023  unsigned AddrSpace) const;
1024 
1025  /// \returns The new vector factor value if the target doesn't support \p
1026  /// SizeInBytes loads or has a better vector factor.
1027  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1028  unsigned ChainSizeInBytes,
1029  VectorType *VecTy) const;
1030 
1031  /// \returns The new vector factor value if the target doesn't support \p
1032  /// SizeInBytes stores or has a better vector factor.
1033  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1034  unsigned ChainSizeInBytes,
1035  VectorType *VecTy) const;
1036 
1037  /// Flags describing the kind of vector reduction.
1039  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1040  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1041  bool IsSigned; ///< Whether the operation is a signed int reduction.
1042  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1043  };
1044 
1045  /// \returns True if the target wants to handle the given reduction idiom in
1046  /// the intrinsics form instead of the shuffle form.
1047  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1048  ReductionFlags Flags) const;
1049 
1050  /// \returns True if the target wants to expand the given reduction intrinsic
1051  /// into a shuffle sequence.
1052  bool shouldExpandReduction(const IntrinsicInst *II) const;
1053  /// @}
1054 
1055 private:
1056  /// Estimate the latency of specified instruction.
1057  /// Returns 1 as the default value.
1058  int getInstructionLatency(const Instruction *I) const;
1059 
1060  /// Returns the expected throughput cost of the instruction.
1061  /// Returns -1 if the cost is unknown.
1062  int getInstructionThroughput(const Instruction *I) const;
1063 
1064  /// The abstract base class used to type erase specific TTI
1065  /// implementations.
1066  class Concept;
1067 
1068  /// The template model for the base class which wraps a concrete
1069  /// implementation in a type erased interface.
1070  template <typename T> class Model;
1071 
1072  std::unique_ptr<Concept> TTIImpl;
1073 };
1074 
1076 public:
1077  virtual ~Concept() = 0;
1078  virtual const DataLayout &getDataLayout() const = 0;
1079  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1080  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1081  ArrayRef<const Value *> Operands) = 0;
1082  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1083  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1084  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1085  virtual int getCallCost(const Function *F,
1086  ArrayRef<const Value *> Arguments, const User *U) = 0;
1087  virtual unsigned getInliningThresholdMultiplier() = 0;
1088  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1089  ArrayRef<Type *> ParamTys, const User *U) = 0;
1090  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1091  ArrayRef<const Value *> Arguments,
1092  const User *U) = 0;
1093  virtual int getMemcpyCost(const Instruction *I) = 0;
1094  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1095  unsigned &JTSize) = 0;
1096  virtual int
1097  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1098  virtual bool hasBranchDivergence() = 0;
1099  virtual bool isSourceOfDivergence(const Value *V) = 0;
1100  virtual bool isAlwaysUniform(const Value *V) = 0;
1101  virtual unsigned getFlatAddressSpace() = 0;
1102  virtual bool isLoweredToCall(const Function *F) = 0;
1103  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1104  UnrollingPreferences &UP) = 0;
1105  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1106  AssumptionCache &AC,
1107  TargetLibraryInfo *LibInfo,
1108  HardwareLoopInfo &HWLoopInfo) = 0;
1109  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1110  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1111  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1112  int64_t BaseOffset, bool HasBaseReg,
1113  int64_t Scale,
1114  unsigned AddrSpace,
1115  Instruction *I) = 0;
1116  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1118  virtual bool canMacroFuseCmp() = 0;
1119  virtual bool shouldFavorPostInc() const = 0;
1120  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1121  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1122  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1123  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1124  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1125  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1126  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1127  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1128  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1129  virtual bool prefersVectorizedAddressing() = 0;
1130  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1131  int64_t BaseOffset, bool HasBaseReg,
1132  int64_t Scale, unsigned AddrSpace) = 0;
1133  virtual bool LSRWithInstrQueries() = 0;
1134  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1135  virtual bool isProfitableToHoist(Instruction *I) = 0;
1136  virtual bool useAA() = 0;
1137  virtual bool isTypeLegal(Type *Ty) = 0;
1138  virtual unsigned getJumpBufAlignment() = 0;
1139  virtual unsigned getJumpBufSize() = 0;
1140  virtual bool shouldBuildLookupTables() = 0;
1141  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1142  virtual bool useColdCCForColdCall(Function &F) = 0;
1143  virtual unsigned
1144  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1145  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1146  unsigned VF) = 0;
1147  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1148  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1149  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1150  bool IsZeroCmp) const = 0;
1151  virtual bool enableInterleavedAccessVectorization() = 0;
1152  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1153  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1154  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1155  unsigned BitWidth,
1156  unsigned AddressSpace,
1157  unsigned Alignment,
1158  bool *Fast) = 0;
1159  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1160  virtual bool haveFastSqrt(Type *Ty) = 0;
1161  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1162  virtual int getFPOpCost(Type *Ty) = 0;
1163  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1164  Type *Ty) = 0;
1165  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1166  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1167  Type *Ty) = 0;
1168  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1169  Type *Ty) = 0;
1170  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1171  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1172  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1173  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1174  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1175  virtual bool shouldConsiderAddressTypePromotion(
1176  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1177  virtual unsigned getCacheLineSize() = 0;
1178  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1179  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1180  virtual unsigned getPrefetchDistance() = 0;
1181  virtual unsigned getMinPrefetchStride() = 0;
1182  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1183  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1184  virtual unsigned
1185  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1186  OperandValueKind Opd2Info,
1187  OperandValueProperties Opd1PropInfo,
1188  OperandValueProperties Opd2PropInfo,
1189  ArrayRef<const Value *> Args) = 0;
1190  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1191  Type *SubTp) = 0;
1192  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1193  const Instruction *I) = 0;
1194  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1195  VectorType *VecTy, unsigned Index) = 0;
1196  virtual int getCFInstrCost(unsigned Opcode) = 0;
1197  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1198  Type *CondTy, const Instruction *I) = 0;
1199  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1200  unsigned Index) = 0;
1201  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1202  unsigned AddressSpace, const Instruction *I) = 0;
1203  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1204  unsigned Alignment,
1205  unsigned AddressSpace) = 0;
1206  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1207  Value *Ptr, bool VariableMask,
1208  unsigned Alignment) = 0;
1209  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1210  unsigned Factor,
1211  ArrayRef<unsigned> Indices,
1212  unsigned Alignment,
1213  unsigned AddressSpace,
1214  bool UseMaskForCond = false,
1215  bool UseMaskForGaps = false) = 0;
1216  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1217  bool IsPairwiseForm) = 0;
1218  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1219  bool IsPairwiseForm, bool IsUnsigned) = 0;
1220  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1222  unsigned ScalarizationCostPassed) = 0;
1223  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1224  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1225  virtual int getCallInstrCost(Function *F, Type *RetTy,
1226  ArrayRef<Type *> Tys) = 0;
1227  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1228  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1229  const SCEV *Ptr) = 0;
1230  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1231  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1232  MemIntrinsicInfo &Info) = 0;
1233  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1234  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1235  Type *ExpectedType) = 0;
1236  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1237  unsigned SrcAlign,
1238  unsigned DestAlign) const = 0;
1239  virtual void getMemcpyLoopResidualLoweringType(
1240  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1241  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1242  virtual bool areInlineCompatible(const Function *Caller,
1243  const Function *Callee) const = 0;
1244  virtual bool
1245  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1246  SmallPtrSetImpl<Argument *> &Args) const = 0;
1247  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1248  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1249  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1250  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1251  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1252  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1253  unsigned Alignment,
1254  unsigned AddrSpace) const = 0;
1255  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1256  unsigned Alignment,
1257  unsigned AddrSpace) const = 0;
1258  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1259  unsigned ChainSizeInBytes,
1260  VectorType *VecTy) const = 0;
1261  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1262  unsigned ChainSizeInBytes,
1263  VectorType *VecTy) const = 0;
1264  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1265  ReductionFlags) const = 0;
1266  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1267  virtual int getInstructionLatency(const Instruction *I) = 0;
1268 };
1269 
1270 template <typename T>
1272  T Impl;
1273 
1274 public:
1275  Model(T Impl) : Impl(std::move(Impl)) {}
1276  ~Model() override {}
1277 
1278  const DataLayout &getDataLayout() const override {
1279  return Impl.getDataLayout();
1280  }
1281 
1282  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1283  return Impl.getOperationCost(Opcode, Ty, OpTy);
1284  }
1285  int getGEPCost(Type *PointeeType, const Value *Ptr,
1286  ArrayRef<const Value *> Operands) override {
1287  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1288  }
1289  int getExtCost(const Instruction *I, const Value *Src) override {
1290  return Impl.getExtCost(I, Src);
1291  }
1292  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1293  return Impl.getCallCost(FTy, NumArgs, U);
1294  }
1295  int getCallCost(const Function *F, int NumArgs, const User *U) override {
1296  return Impl.getCallCost(F, NumArgs, U);
1297  }
1298  int getCallCost(const Function *F,
1299  ArrayRef<const Value *> Arguments, const User *U) override {
1300  return Impl.getCallCost(F, Arguments, U);
1301  }
1302  unsigned getInliningThresholdMultiplier() override {
1303  return Impl.getInliningThresholdMultiplier();
1304  }
1305  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1306  ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1307  return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1308  }
1309  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1310  ArrayRef<const Value *> Arguments,
1311  const User *U = nullptr) override {
1312  return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1313  }
1314  int getMemcpyCost(const Instruction *I) override {
1315  return Impl.getMemcpyCost(I);
1316  }
1317  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1318  return Impl.getUserCost(U, Operands);
1319  }
1320  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1321  bool isSourceOfDivergence(const Value *V) override {
1322  return Impl.isSourceOfDivergence(V);
1323  }
1324 
1325  bool isAlwaysUniform(const Value *V) override {
1326  return Impl.isAlwaysUniform(V);
1327  }
1328 
1329  unsigned getFlatAddressSpace() override {
1330  return Impl.getFlatAddressSpace();
1331  }
1332 
1333  bool isLoweredToCall(const Function *F) override {
1334  return Impl.isLoweredToCall(F);
1335  }
1336  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1337  UnrollingPreferences &UP) override {
1338  return Impl.getUnrollingPreferences(L, SE, UP);
1339  }
1340  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1341  AssumptionCache &AC,
1342  TargetLibraryInfo *LibInfo,
1343  HardwareLoopInfo &HWLoopInfo) override {
1344  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1345  }
1346  bool isLegalAddImmediate(int64_t Imm) override {
1347  return Impl.isLegalAddImmediate(Imm);
1348  }
1349  bool isLegalICmpImmediate(int64_t Imm) override {
1350  return Impl.isLegalICmpImmediate(Imm);
1351  }
1352  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1353  bool HasBaseReg, int64_t Scale,
1354  unsigned AddrSpace,
1355  Instruction *I) override {
1356  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1357  Scale, AddrSpace, I);
1358  }
1359  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1360  TargetTransformInfo::LSRCost &C2) override {
1361  return Impl.isLSRCostLess(C1, C2);
1362  }
1363  bool canMacroFuseCmp() override {
1364  return Impl.canMacroFuseCmp();
1365  }
1366  bool shouldFavorPostInc() const override {
1367  return Impl.shouldFavorPostInc();
1368  }
1369  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1370  return Impl.shouldFavorBackedgeIndex(L);
1371  }
1372  bool isLegalMaskedStore(Type *DataType) override {
1373  return Impl.isLegalMaskedStore(DataType);
1374  }
1375  bool isLegalMaskedLoad(Type *DataType) override {
1376  return Impl.isLegalMaskedLoad(DataType);
1377  }
1378  bool isLegalMaskedScatter(Type *DataType) override {
1379  return Impl.isLegalMaskedScatter(DataType);
1380  }
1381  bool isLegalMaskedGather(Type *DataType) override {
1382  return Impl.isLegalMaskedGather(DataType);
1383  }
1384  bool isLegalMaskedCompressStore(Type *DataType) override {
1385  return Impl.isLegalMaskedCompressStore(DataType);
1386  }
1387  bool isLegalMaskedExpandLoad(Type *DataType) override {
1388  return Impl.isLegalMaskedExpandLoad(DataType);
1389  }
1390  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1391  return Impl.hasDivRemOp(DataType, IsSigned);
1392  }
1393  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1394  return Impl.hasVolatileVariant(I, AddrSpace);
1395  }
1396  bool prefersVectorizedAddressing() override {
1397  return Impl.prefersVectorizedAddressing();
1398  }
1399  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1400  bool HasBaseReg, int64_t Scale,
1401  unsigned AddrSpace) override {
1402  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1403  Scale, AddrSpace);
1404  }
1405  bool LSRWithInstrQueries() override {
1406  return Impl.LSRWithInstrQueries();
1407  }
1408  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1409  return Impl.isTruncateFree(Ty1, Ty2);
1410  }
1411  bool isProfitableToHoist(Instruction *I) override {
1412  return Impl.isProfitableToHoist(I);
1413  }
1414  bool useAA() override { return Impl.useAA(); }
1415  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1416  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1417  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1418  bool shouldBuildLookupTables() override {
1419  return Impl.shouldBuildLookupTables();
1420  }
1421  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1422  return Impl.shouldBuildLookupTablesForConstant(C);
1423  }
1424  bool useColdCCForColdCall(Function &F) override {
1425  return Impl.useColdCCForColdCall(F);
1426  }
1427 
1428  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1429  bool Extract) override {
1430  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1431  }
1432  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1433  unsigned VF) override {
1434  return Impl.getOperandsScalarizationOverhead(Args, VF);
1435  }
1436 
1437  bool supportsEfficientVectorElementLoadStore() override {
1438  return Impl.supportsEfficientVectorElementLoadStore();
1439  }
1440 
1441  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1442  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1443  }
1444  const MemCmpExpansionOptions *enableMemCmpExpansion(
1445  bool IsZeroCmp) const override {
1446  return Impl.enableMemCmpExpansion(IsZeroCmp);
1447  }
1448  bool enableInterleavedAccessVectorization() override {
1449  return Impl.enableInterleavedAccessVectorization();
1450  }
1451  bool enableMaskedInterleavedAccessVectorization() override {
1452  return Impl.enableMaskedInterleavedAccessVectorization();
1453  }
1454  bool isFPVectorizationPotentiallyUnsafe() override {
1455  return Impl.isFPVectorizationPotentiallyUnsafe();
1456  }
1457  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1458  unsigned BitWidth, unsigned AddressSpace,
1459  unsigned Alignment, bool *Fast) override {
1460  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1461  Alignment, Fast);
1462  }
1463  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1464  return Impl.getPopcntSupport(IntTyWidthInBit);
1465  }
1466  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1467 
1468  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1469  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1470  }
1471 
1472  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1473 
1474  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1475  Type *Ty) override {
1476  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1477  }
1478  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1479  return Impl.getIntImmCost(Imm, Ty);
1480  }
1481  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1482  Type *Ty) override {
1483  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1484  }
1485  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1486  Type *Ty) override {
1487  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1488  }
1489  unsigned getNumberOfRegisters(bool Vector) override {
1490  return Impl.getNumberOfRegisters(Vector);
1491  }
1492  unsigned getRegisterBitWidth(bool Vector) const override {
1493  return Impl.getRegisterBitWidth(Vector);
1494  }
1495  unsigned getMinVectorRegisterBitWidth() override {
1496  return Impl.getMinVectorRegisterBitWidth();
1497  }
1498  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1499  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1500  }
1501  unsigned getMinimumVF(unsigned ElemWidth) const override {
1502  return Impl.getMinimumVF(ElemWidth);
1503  }
1504  bool shouldConsiderAddressTypePromotion(
1505  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1506  return Impl.shouldConsiderAddressTypePromotion(
1507  I, AllowPromotionWithoutCommonHeader);
1508  }
1509  unsigned getCacheLineSize() override {
1510  return Impl.getCacheLineSize();
1511  }
1512  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1513  return Impl.getCacheSize(Level);
1514  }
1515  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1516  return Impl.getCacheAssociativity(Level);
1517  }
1518  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1519  unsigned getMinPrefetchStride() override {
1520  return Impl.getMinPrefetchStride();
1521  }
1522  unsigned getMaxPrefetchIterationsAhead() override {
1523  return Impl.getMaxPrefetchIterationsAhead();
1524  }
1525  unsigned getMaxInterleaveFactor(unsigned VF) override {
1526  return Impl.getMaxInterleaveFactor(VF);
1527  }
1528  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1529  unsigned &JTSize) override {
1530  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1531  }
1532  unsigned
1533  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1534  OperandValueKind Opd2Info,
1535  OperandValueProperties Opd1PropInfo,
1536  OperandValueProperties Opd2PropInfo,
1537  ArrayRef<const Value *> Args) override {
1538  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1539  Opd1PropInfo, Opd2PropInfo, Args);
1540  }
1541  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1542  Type *SubTp) override {
1543  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1544  }
1545  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1546  const Instruction *I) override {
1547  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1548  }
1549  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1550  unsigned Index) override {
1551  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1552  }
1553  int getCFInstrCost(unsigned Opcode) override {
1554  return Impl.getCFInstrCost(Opcode);
1555  }
1556  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1557  const Instruction *I) override {
1558  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1559  }
1560  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1561  return Impl.getVectorInstrCost(Opcode, Val, Index);
1562  }
1563  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1564  unsigned AddressSpace, const Instruction *I) override {
1565  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1566  }
1567  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1568  unsigned AddressSpace) override {
1569  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1570  }
1571  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1572  Value *Ptr, bool VariableMask,
1573  unsigned Alignment) override {
1574  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1575  Alignment);
1576  }
1577  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1578  ArrayRef<unsigned> Indices, unsigned Alignment,
1579  unsigned AddressSpace, bool UseMaskForCond,
1580  bool UseMaskForGaps) override {
1581  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1582  Alignment, AddressSpace,
1583  UseMaskForCond, UseMaskForGaps);
1584  }
1585  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1586  bool IsPairwiseForm) override {
1587  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1588  }
1589  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1590  bool IsPairwiseForm, bool IsUnsigned) override {
1591  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1592  }
1593  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1594  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1595  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1596  ScalarizationCostPassed);
1597  }
1598  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1599  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1600  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1601  }
1602  int getCallInstrCost(Function *F, Type *RetTy,
1603  ArrayRef<Type *> Tys) override {
1604  return Impl.getCallInstrCost(F, RetTy, Tys);
1605  }
1606  unsigned getNumberOfParts(Type *Tp) override {
1607  return Impl.getNumberOfParts(Tp);
1608  }
1609  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1610  const SCEV *Ptr) override {
1611  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1612  }
1613  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1614  return Impl.getCostOfKeepingLiveOverCall(Tys);
1615  }
1616  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1617  MemIntrinsicInfo &Info) override {
1618  return Impl.getTgtMemIntrinsic(Inst, Info);
1619  }
1620  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1621  return Impl.getAtomicMemIntrinsicMaxElementSize();
1622  }
1623  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1624  Type *ExpectedType) override {
1625  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1626  }
1627  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1628  unsigned SrcAlign,
1629  unsigned DestAlign) const override {
1630  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1631  }
1632  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1633  LLVMContext &Context,
1634  unsigned RemainingBytes,
1635  unsigned SrcAlign,
1636  unsigned DestAlign) const override {
1637  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1638  SrcAlign, DestAlign);
1639  }
1640  bool areInlineCompatible(const Function *Caller,
1641  const Function *Callee) const override {
1642  return Impl.areInlineCompatible(Caller, Callee);
1643  }
1645  const Function *Caller, const Function *Callee,
1646  SmallPtrSetImpl<Argument *> &Args) const override {
1647  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1648  }
1649  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1650  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1651  }
1652  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1653  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1654  }
1655  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1656  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1657  }
1658  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1659  return Impl.isLegalToVectorizeLoad(LI);
1660  }
1661  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1662  return Impl.isLegalToVectorizeStore(SI);
1663  }
1664  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1665  unsigned Alignment,
1666  unsigned AddrSpace) const override {
1667  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1668  AddrSpace);
1669  }
1670  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1671  unsigned Alignment,
1672  unsigned AddrSpace) const override {
1673  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1674  AddrSpace);
1675  }
1676  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1677  unsigned ChainSizeInBytes,
1678  VectorType *VecTy) const override {
1679  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1680  }
1681  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1682  unsigned ChainSizeInBytes,
1683  VectorType *VecTy) const override {
1684  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1685  }
1686  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1687  ReductionFlags Flags) const override {
1688  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1689  }
1690  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1691  return Impl.shouldExpandReduction(II);
1692  }
1693  int getInstructionLatency(const Instruction *I) override {
1694  return Impl.getInstructionLatency(I);
1695  }
1696 };
1697 
1698 template <typename T>
1700  : TTIImpl(new Model<T>(Impl)) {}
1701 
1702 /// Analysis pass providing the \c TargetTransformInfo.
1703 ///
1704 /// The core idea of the TargetIRAnalysis is to expose an interface through
1705 /// which LLVM targets can analyze and provide information about the middle
1706 /// end's target-independent IR. This supports use cases such as target-aware
1707 /// cost modeling of IR constructs.
1708 ///
1709 /// This is a function analysis because much of the cost modeling for targets
1710 /// is done in a subtarget specific way and LLVM supports compiling different
1711 /// functions targeting different subtargets in order to support runtime
1712 /// dispatch according to the observed subtarget.
1713 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1714 public:
1716 
1717  /// Default construct a target IR analysis.
1718  ///
1719  /// This will use the module's datalayout to construct a baseline
1720  /// conservative TTI result.
1721  TargetIRAnalysis();
1722 
1723  /// Construct an IR analysis pass around a target-provide callback.
1724  ///
1725  /// The callback will be called with a particular function for which the TTI
1726  /// is needed and must return a TTI object for that function.
1727  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1728 
1729  // Value semantics. We spell out the constructors for MSVC.
1731  : TTICallback(Arg.TTICallback) {}
1733  : TTICallback(std::move(Arg.TTICallback)) {}
1735  TTICallback = RHS.TTICallback;
1736  return *this;
1737  }
1739  TTICallback = std::move(RHS.TTICallback);
1740  return *this;
1741  }
1742 
1743  Result run(const Function &F, FunctionAnalysisManager &);
1744 
1745 private:
1747  static AnalysisKey Key;
1748 
1749  /// The callback used to produce a result.
1750  ///
1751  /// We use a completely opaque callback so that targets can provide whatever
1752  /// mechanism they desire for constructing the TTI for a given function.
1753  ///
1754  /// FIXME: Should we really use std::function? It's relatively inefficient.
1755  /// It might be possible to arrange for even stateful callbacks to outlive
1756  /// the analysis and thus use a function_ref which would be lighter weight.
1757  /// This may also be less error prone as the callback is likely to reference
1758  /// the external TargetMachine, and that reference needs to never dangle.
1759  std::function<Result(const Function &)> TTICallback;
1760 
1761  /// Helper function used as the callback in the default constructor.
1762  static Result getDefaultTTI(const Function &F);
1763 };
1764 
1765 /// Wrapper pass for TargetTransformInfo.
1766 ///
1767 /// This pass can be constructed from a TTI object which it stores internally
1768 /// and is queried by passes.
1770  TargetIRAnalysis TIRA;
1772 
1773  virtual void anchor();
1774 
1775 public:
1776  static char ID;
1777 
1778  /// We must provide a default constructor for the pass but it should
1779  /// never be used.
1780  ///
1781  /// Use the constructor below or call one of the creation routines.
1783 
1785 
1786  TargetTransformInfo &getTTI(const Function &F);
1787 };
1788 
1789 /// Create an analysis pass wrapper around a TTI object.
1790 ///
1791 /// This analysis pass just holds the TTI instance and makes it available to
1792 /// clients.
1794 
1795 } // End llvm namespace
1796 
1797 #endif
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:102
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
Attributes of a target dependent hardware loop.
Reverse the order of the vector.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:389
AMDGPU Lower Kernel Arguments
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Class to represent integer types.
Definition: DerivedTypes.h:39
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:424
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:648
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:72
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.