LLVM  9.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include <functional>
31 
32 namespace llvm {
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 class Function;
39 class GlobalValue;
40 class IntrinsicInst;
41 class LoadInst;
42 class Loop;
43 class SCEV;
44 class ScalarEvolution;
45 class StoreInst;
46 class SwitchInst;
47 class Type;
48 class User;
49 class Value;
50 
51 /// Information about a load/store intrinsic defined by the target.
53  /// This is the pointer that the intrinsic is loading from or storing to.
54  /// If this is non-null, then analysis/optimization passes can assume that
55  /// this intrinsic is functionally equivalent to a load/store from this
56  /// pointer.
57  Value *PtrVal = nullptr;
58 
59  // Ordering for atomic operations.
61 
62  // Same Id is set by the target for corresponding load/store intrinsics.
63  unsigned short MatchingId = 0;
64 
65  bool ReadMem = false;
66  bool WriteMem = false;
67  bool IsVolatile = false;
68 
69  bool isUnordered() const {
70  return (Ordering == AtomicOrdering::NotAtomic ||
71  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
72  }
73 };
74 
75 /// This pass provides access to the codegen interfaces that are needed
76 /// for IR-level transformations.
78 public:
79  /// Construct a TTI object using a type implementing the \c Concept
80  /// API below.
81  ///
82  /// This is used by targets to construct a TTI wrapping their target-specific
83  /// implementaion that encodes appropriate costs for their target.
84  template <typename T> TargetTransformInfo(T Impl);
85 
86  /// Construct a baseline TTI object using a minimal implementation of
87  /// the \c Concept API below.
88  ///
89  /// The TTI implementation will reflect the information in the DataLayout
90  /// provided if non-null.
91  explicit TargetTransformInfo(const DataLayout &DL);
92 
93  // Provide move semantics.
95  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
96 
97  // We need to define the destructor out-of-line to define our sub-classes
98  // out-of-line.
100 
101  /// Handle the invalidation of this information.
102  ///
103  /// When used as a result of \c TargetIRAnalysis this method will be called
104  /// when the function this was computed for changes. When it returns false,
105  /// the information is preserved across those changes.
108  // FIXME: We should probably in some way ensure that the subtarget
109  // information for a function hasn't changed.
110  return false;
111  }
112 
113  /// \name Generic Target Information
114  /// @{
115 
116  /// The kind of cost model.
117  ///
118  /// There are several different cost models that can be customized by the
119  /// target. The normalization of each cost model may be target specific.
121  TCK_RecipThroughput, ///< Reciprocal throughput.
122  TCK_Latency, ///< The latency of instruction.
123  TCK_CodeSize ///< Instruction code size.
124  };
125 
126  /// Query the cost of a specified instruction.
127  ///
128  /// Clients should use this interface to query the cost of an existing
129  /// instruction. The instruction must have a valid parent (basic block).
130  ///
131  /// Note, this method does not cache the cost calculation and it
132  /// can be expensive in some cases.
133  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
134  switch (kind){
135  case TCK_RecipThroughput:
136  return getInstructionThroughput(I);
137 
138  case TCK_Latency:
139  return getInstructionLatency(I);
140 
141  case TCK_CodeSize:
142  return getUserCost(I);
143  }
144  llvm_unreachable("Unknown instruction cost kind");
145  }
146 
147  /// Underlying constants for 'cost' values in this interface.
148  ///
149  /// Many APIs in this interface return a cost. This enum defines the
150  /// fundamental values that should be used to interpret (and produce) those
151  /// costs. The costs are returned as an int rather than a member of this
152  /// enumeration because it is expected that the cost of one IR instruction
153  /// may have a multiplicative factor to it or otherwise won't fit directly
154  /// into the enum. Moreover, it is common to sum or average costs which works
155  /// better as simple integral values. Thus this enum only provides constants.
156  /// Also note that the returned costs are signed integers to make it natural
157  /// to add, subtract, and test with zero (a common boundary condition). It is
158  /// not expected that 2^32 is a realistic cost to be modeling at any point.
159  ///
160  /// Note that these costs should usually reflect the intersection of code-size
161  /// cost and execution cost. A free instruction is typically one that folds
162  /// into another instruction. For example, reg-to-reg moves can often be
163  /// skipped by renaming the registers in the CPU, but they still are encoded
164  /// and thus wouldn't be considered 'free' here.
166  TCC_Free = 0, ///< Expected to fold away in lowering.
167  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
168  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
169  };
170 
171  /// Estimate the cost of a specific operation when lowered.
172  ///
173  /// Note that this is designed to work on an arbitrary synthetic opcode, and
174  /// thus work for hypothetical queries before an instruction has even been
175  /// formed. However, this does *not* work for GEPs, and must not be called
176  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
177  /// analyzing a GEP's cost required more information.
178  ///
179  /// Typically only the result type is required, and the operand type can be
180  /// omitted. However, if the opcode is one of the cast instructions, the
181  /// operand type is required.
182  ///
183  /// The returned cost is defined in terms of \c TargetCostConstants, see its
184  /// comments for a detailed explanation of the cost values.
185  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
186 
187  /// Estimate the cost of a GEP operation when lowered.
188  ///
189  /// The contract for this function is the same as \c getOperationCost except
190  /// that it supports an interface that provides extra information specific to
191  /// the GEP operation.
192  int getGEPCost(Type *PointeeType, const Value *Ptr,
193  ArrayRef<const Value *> Operands) const;
194 
195  /// Estimate the cost of a EXT operation when lowered.
196  ///
197  /// The contract for this function is the same as \c getOperationCost except
198  /// that it supports an interface that provides extra information specific to
199  /// the EXT operation.
200  int getExtCost(const Instruction *I, const Value *Src) const;
201 
202  /// Estimate the cost of a function call when lowered.
203  ///
204  /// The contract for this is the same as \c getOperationCost except that it
205  /// supports an interface that provides extra information specific to call
206  /// instructions.
207  ///
208  /// This is the most basic query for estimating call cost: it only knows the
209  /// function type and (potentially) the number of arguments at the call site.
210  /// The latter is only interesting for varargs function types.
211  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
212 
213  /// Estimate the cost of calling a specific function when lowered.
214  ///
215  /// This overload adds the ability to reason about the particular function
216  /// being called in the event it is a library call with special lowering.
217  int getCallCost(const Function *F, int NumArgs = -1) const;
218 
219  /// Estimate the cost of calling a specific function when lowered.
220  ///
221  /// This overload allows specifying a set of candidate argument values.
222  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
223 
224  /// \returns A value by which our inlining threshold should be multiplied.
225  /// This is primarily used to bump up the inlining threshold wholesale on
226  /// targets where calls are unusually expensive.
227  ///
228  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
229  /// individual classes of instructions would be better.
230  unsigned getInliningThresholdMultiplier() const;
231 
232  /// Estimate the cost of an intrinsic when lowered.
233  ///
234  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
235  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
236  ArrayRef<Type *> ParamTys) const;
237 
238  /// Estimate the cost of an intrinsic when lowered.
239  ///
240  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
241  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
243 
244  /// \return The estimated number of case clusters when lowering \p 'SI'.
245  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
246  /// table.
247  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
248  unsigned &JTSize) const;
249 
250  /// Estimate the cost of a given IR user when lowered.
251  ///
252  /// This can estimate the cost of either a ConstantExpr or Instruction when
253  /// lowered. It has two primary advantages over the \c getOperationCost and
254  /// \c getGEPCost above, and one significant disadvantage: it can only be
255  /// used when the IR construct has already been formed.
256  ///
257  /// The advantages are that it can inspect the SSA use graph to reason more
258  /// accurately about the cost. For example, all-constant-GEPs can often be
259  /// folded into a load or other instruction, but if they are used in some
260  /// other context they may not be folded. This routine can distinguish such
261  /// cases.
262  ///
263  /// \p Operands is a list of operands which can be a result of transformations
264  /// of the current operands. The number of the operands on the list must equal
265  /// to the number of the current operands the IR user has. Their order on the
266  /// list must be the same as the order of the current operands the IR user
267  /// has.
268  ///
269  /// The returned cost is defined in terms of \c TargetCostConstants, see its
270  /// comments for a detailed explanation of the cost values.
271  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
272 
273  /// This is a helper function which calls the two-argument getUserCost
274  /// with \p Operands which are the current operands U has.
275  int getUserCost(const User *U) const {
277  U->value_op_end());
278  return getUserCost(U, Operands);
279  }
280 
281  /// Return true if branch divergence exists.
282  ///
283  /// Branch divergence has a significantly negative impact on GPU performance
284  /// when threads in the same wavefront take different paths due to conditional
285  /// branches.
286  bool hasBranchDivergence() const;
287 
288  /// Returns whether V is a source of divergence.
289  ///
290  /// This function provides the target-dependent information for
291  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
292  /// builds the dependency graph, and then runs the reachability algorithm
293  /// starting with the sources of divergence.
294  bool isSourceOfDivergence(const Value *V) const;
295 
296  // Returns true for the target specific
297  // set of operations which produce uniform result
298  // even taking non-unform arguments
299  bool isAlwaysUniform(const Value *V) const;
300 
301  /// Returns the address space ID for a target's 'flat' address space. Note
302  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
303  /// refers to as the generic address space. The flat address space is a
304  /// generic address space that can be used access multiple segments of memory
305  /// with different address spaces. Access of a memory location through a
306  /// pointer with this address space is expected to be legal but slower
307  /// compared to the same memory location accessed through a pointer with a
308  /// different address space.
309  //
310  /// This is for targets with different pointer representations which can
311  /// be converted with the addrspacecast instruction. If a pointer is converted
312  /// to this address space, optimizations should attempt to replace the access
313  /// with the source address space.
314  ///
315  /// \returns ~0u if the target does not have such a flat address space to
316  /// optimize away.
317  unsigned getFlatAddressSpace() const;
318 
319  /// Test whether calls to a function lower to actual program function
320  /// calls.
321  ///
322  /// The idea is to test whether the program is likely to require a 'call'
323  /// instruction or equivalent in order to call the given function.
324  ///
325  /// FIXME: It's not clear that this is a good or useful query API. Client's
326  /// should probably move to simpler cost metrics using the above.
327  /// Alternatively, we could split the cost interface into distinct code-size
328  /// and execution-speed costs. This would allow modelling the core of this
329  /// query more accurately as a call is a single small instruction, but
330  /// incurs significant execution cost.
331  bool isLoweredToCall(const Function *F) const;
332 
333  struct LSRCost {
334  /// TODO: Some of these could be merged. Also, a lexical ordering
335  /// isn't always optimal.
336  unsigned Insns;
337  unsigned NumRegs;
338  unsigned AddRecCost;
339  unsigned NumIVMuls;
340  unsigned NumBaseAdds;
341  unsigned ImmCost;
342  unsigned SetupCost;
343  unsigned ScaleCost;
344  };
345 
346  /// Parameters that control the generic loop unrolling transformation.
348  /// The cost threshold for the unrolled loop. Should be relative to the
349  /// getUserCost values returned by this API, and the expectation is that
350  /// the unrolled loop's instructions when run through that interface should
351  /// not exceed this cost. However, this is only an estimate. Also, specific
352  /// loops may be unrolled even with a cost above this threshold if deemed
353  /// profitable. Set this to UINT_MAX to disable the loop body cost
354  /// restriction.
355  unsigned Threshold;
356  /// If complete unrolling will reduce the cost of the loop, we will boost
357  /// the Threshold by a certain percent to allow more aggressive complete
358  /// unrolling. This value provides the maximum boost percentage that we
359  /// can apply to Threshold (The value should be no less than 100).
360  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
361  /// MaxPercentThresholdBoost / 100)
362  /// E.g. if complete unrolling reduces the loop execution time by 50%
363  /// then we boost the threshold by the factor of 2x. If unrolling is not
364  /// expected to reduce the running time, then we do not increase the
365  /// threshold.
367  /// The cost threshold for the unrolled loop when optimizing for size (set
368  /// to UINT_MAX to disable).
370  /// The cost threshold for the unrolled loop, like Threshold, but used
371  /// for partial/runtime unrolling (set to UINT_MAX to disable).
373  /// The cost threshold for the unrolled loop when optimizing for size, like
374  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
375  /// UINT_MAX to disable).
377  /// A forced unrolling factor (the number of concatenated bodies of the
378  /// original loop in the unrolled loop body). When set to 0, the unrolling
379  /// transformation will select an unrolling factor based on the current cost
380  /// threshold and other factors.
381  unsigned Count;
382  /// A forced peeling factor (the number of bodied of the original loop
383  /// that should be peeled off before the loop body). When set to 0, the
384  /// unrolling transformation will select a peeling factor based on profile
385  /// information and other factors.
386  unsigned PeelCount;
387  /// Default unroll count for loops with run-time trip count.
389  // Set the maximum unrolling factor. The unrolling factor may be selected
390  // using the appropriate cost threshold, but may not exceed this number
391  // (set to UINT_MAX to disable). This does not apply in cases where the
392  // loop is being fully unrolled.
393  unsigned MaxCount;
394  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
395  /// applies even if full unrolling is selected. This allows a target to fall
396  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
398  // Represents number of instructions optimized when "back edge"
399  // becomes "fall through" in unrolled loop.
400  // For now we count a conditional branch on a backedge and a comparison
401  // feeding it.
402  unsigned BEInsns;
403  /// Allow partial unrolling (unrolling of loops to expand the size of the
404  /// loop body, not only to eliminate small constant-trip-count loops).
405  bool Partial;
406  /// Allow runtime unrolling (unrolling of loops to expand the size of the
407  /// loop body even when the number of loop iterations is not known at
408  /// compile time).
409  bool Runtime;
410  /// Allow generation of a loop remainder (extra iterations after unroll).
412  /// Allow emitting expensive instructions (such as divisions) when computing
413  /// the trip count of a loop for runtime unrolling.
415  /// Apply loop unroll on any kind of loop
416  /// (mainly to loops that fail runtime unrolling).
417  bool Force;
418  /// Allow using trip count upper bound to unroll loops.
420  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
422  /// Allow unrolling of all the iterations of the runtime loop remainder.
424  /// Allow unroll and jam. Used to enable unroll and jam for the target.
426  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
427  /// value above is used during unroll and jam for the outer loop size.
428  /// This value is used in the same manner to limit the size of the inner
429  /// loop.
431  };
432 
433  /// Get target-customized preferences for the generic loop unrolling
434  /// transformation. The caller will initialize UP with the current
435  /// target-independent defaults.
436  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
437  UnrollingPreferences &UP) const;
438 
439  /// @}
440 
441  /// \name Scalar Target Information
442  /// @{
443 
444  /// Flags indicating the kind of support for population count.
445  ///
446  /// Compared to the SW implementation, HW support is supposed to
447  /// significantly boost the performance when the population is dense, and it
448  /// may or may not degrade performance if the population is sparse. A HW
449  /// support is considered as "Fast" if it can outperform, or is on a par
450  /// with, SW implementation when the population is sparse; otherwise, it is
451  /// considered as "Slow".
452  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
453 
454  /// Return true if the specified immediate is legal add immediate, that
455  /// is the target has add instructions which can add a register with the
456  /// immediate without having to materialize the immediate into a register.
457  bool isLegalAddImmediate(int64_t Imm) const;
458 
459  /// Return true if the specified immediate is legal icmp immediate,
460  /// that is the target has icmp instructions which can compare a register
461  /// against the immediate without having to materialize the immediate into a
462  /// register.
463  bool isLegalICmpImmediate(int64_t Imm) const;
464 
465  /// Return true if the addressing mode represented by AM is legal for
466  /// this target, for a load/store of the specified type.
467  /// The type may be VoidTy, in which case only return true if the addressing
468  /// mode is legal for a load/store of any legal type.
469  /// If target returns true in LSRWithInstrQueries(), I may be valid.
470  /// TODO: Handle pre/postinc as well.
471  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
472  bool HasBaseReg, int64_t Scale,
473  unsigned AddrSpace = 0,
474  Instruction *I = nullptr) const;
475 
476  /// Return true if LSR cost of C1 is lower than C1.
477  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
478  TargetTransformInfo::LSRCost &C2) const;
479 
480  /// Return true if the target can fuse a compare and branch.
481  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
482  /// calculation for the instructions in a loop.
483  bool canMacroFuseCmp() const;
484 
485  /// \return True is LSR should make efforts to create/preserve post-inc
486  /// addressing mode expressions.
487  bool shouldFavorPostInc() const;
488 
489  /// Return true if LSR should make efforts to generate indexed addressing
490  /// modes that operate across loop iterations.
491  bool shouldFavorBackedgeIndex(const Loop *L) const;
492 
493  /// Return true if the target supports masked load/store
494  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
495  bool isLegalMaskedStore(Type *DataType) const;
496  bool isLegalMaskedLoad(Type *DataType) const;
497 
498  /// Return true if the target supports masked gather/scatter
499  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
500  /// bits scalar type.
501  bool isLegalMaskedScatter(Type *DataType) const;
502  bool isLegalMaskedGather(Type *DataType) const;
503 
504  /// Return true if the target has a unified operation to calculate division
505  /// and remainder. If so, the additional implicit multiplication and
506  /// subtraction required to calculate a remainder from division are free. This
507  /// can enable more aggressive transformations for division and remainder than
508  /// would typically be allowed using throughput or size cost models.
509  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
510 
511  /// Return true if the given instruction (assumed to be a memory access
512  /// instruction) has a volatile variant. If that's the case then we can avoid
513  /// addrspacecast to generic AS for volatile loads/stores. Default
514  /// implementation returns false, which prevents address space inference for
515  /// volatile loads/stores.
516  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
517 
518  /// Return true if target doesn't mind addresses in vectors.
519  bool prefersVectorizedAddressing() const;
520 
521  /// Return the cost of the scaling factor used in the addressing
522  /// mode represented by AM for this target, for a load/store
523  /// of the specified type.
524  /// If the AM is supported, the return value must be >= 0.
525  /// If the AM is not supported, it returns a negative value.
526  /// TODO: Handle pre/postinc as well.
527  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
528  bool HasBaseReg, int64_t Scale,
529  unsigned AddrSpace = 0) const;
530 
531  /// Return true if the loop strength reduce pass should make
532  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
533  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
534  /// immediate offset and no index register.
535  bool LSRWithInstrQueries() const;
536 
537  /// Return true if it's free to truncate a value of type Ty1 to type
538  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
539  /// by referencing its sub-register AX.
540  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
541 
542  /// Return true if it is profitable to hoist instruction in the
543  /// then/else to before if.
544  bool isProfitableToHoist(Instruction *I) const;
545 
546  bool useAA() const;
547 
548  /// Return true if this type is legal.
549  bool isTypeLegal(Type *Ty) const;
550 
551  /// Returns the target's jmp_buf alignment in bytes.
552  unsigned getJumpBufAlignment() const;
553 
554  /// Returns the target's jmp_buf size in bytes.
555  unsigned getJumpBufSize() const;
556 
557  /// Return true if switches should be turned into lookup tables for the
558  /// target.
559  bool shouldBuildLookupTables() const;
560 
561  /// Return true if switches should be turned into lookup tables
562  /// containing this constant value for the target.
563  bool shouldBuildLookupTablesForConstant(Constant *C) const;
564 
565  /// Return true if the input function which is cold at all call sites,
566  /// should use coldcc calling convention.
567  bool useColdCCForColdCall(Function &F) const;
568 
569  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
570 
571  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
572  unsigned VF) const;
573 
574  /// If target has efficient vector element load/store instructions, it can
575  /// return true here so that insertion/extraction costs are not added to
576  /// the scalarization cost of a load/store.
577  bool supportsEfficientVectorElementLoadStore() const;
578 
579  /// Don't restrict interleaved unrolling to small loops.
580  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
581 
582  /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
583  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
585  // The list of available load sizes (in bytes), sorted in decreasing order.
587  // Set to true to allow overlapping loads. For example, 7-byte compares can
588  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
589  // requires all loads in LoadSizes to be doable in an unaligned way.
590  bool AllowOverlappingLoads = false;
591  };
592  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
593 
594  /// Enable matching of interleaved access groups.
595  bool enableInterleavedAccessVectorization() const;
596 
597  /// Enable matching of interleaved access groups that contain predicated
598  /// accesses or gaps and therefore vectorized using masked
599  /// vector loads/stores.
600  bool enableMaskedInterleavedAccessVectorization() const;
601 
602  /// Indicate that it is potentially unsafe to automatically vectorize
603  /// floating-point operations because the semantics of vector and scalar
604  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
605  /// does not support IEEE-754 denormal numbers, while depending on the
606  /// platform, scalar floating-point math does.
607  /// This applies to floating-point math operations and calls, not memory
608  /// operations, shuffles, or casts.
609  bool isFPVectorizationPotentiallyUnsafe() const;
610 
611  /// Determine if the target supports unaligned memory accesses.
612  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
613  unsigned BitWidth, unsigned AddressSpace = 0,
614  unsigned Alignment = 1,
615  bool *Fast = nullptr) const;
616 
617  /// Return hardware support for population count.
618  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
619 
620  /// Return true if the hardware has a fast square-root instruction.
621  bool haveFastSqrt(Type *Ty) const;
622 
623  /// Return true if it is faster to check if a floating-point value is NaN
624  /// (or not-NaN) versus a comparison against a constant FP zero value.
625  /// Targets should override this if materializing a 0.0 for comparison is
626  /// generally as cheap as checking for ordered/unordered.
627  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
628 
629  /// Return the expected cost of supporting the floating point operation
630  /// of the specified type.
631  int getFPOpCost(Type *Ty) const;
632 
633  /// Return the expected cost of materializing for the given integer
634  /// immediate of the specified type.
635  int getIntImmCost(const APInt &Imm, Type *Ty) const;
636 
637  /// Return the expected cost of materialization for the given integer
638  /// immediate of the specified type for a given instruction. The cost can be
639  /// zero if the immediate can be folded into the specified instruction.
640  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
641  Type *Ty) const;
642  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
643  Type *Ty) const;
644 
645  /// Return the expected cost for the given integer when optimising
646  /// for size. This is different than the other integer immediate cost
647  /// functions in that it is subtarget agnostic. This is useful when you e.g.
648  /// target one ISA such as Aarch32 but smaller encodings could be possible
649  /// with another such as Thumb. This return value is used as a penalty when
650  /// the total costs for a constant is calculated (the bigger the cost, the
651  /// more beneficial constant hoisting is).
652  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
653  Type *Ty) const;
654  /// @}
655 
656  /// \name Vector Target Information
657  /// @{
658 
659  /// The various kinds of shuffle patterns for vector queries.
660  enum ShuffleKind {
661  SK_Broadcast, ///< Broadcast element 0 to all other elements.
662  SK_Reverse, ///< Reverse the order of the vector.
663  SK_Select, ///< Selects elements from the corresponding lane of
664  ///< either source operand. This is equivalent to a
665  ///< vector select with a constant condition operand.
666  SK_Transpose, ///< Transpose two vectors.
667  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
668  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
669  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
670  ///< with any shuffle mask.
671  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
672  ///< shuffle mask.
673  };
674 
675  /// Additional information about an operand's possible values.
677  OK_AnyValue, // Operand can have any value.
678  OK_UniformValue, // Operand is uniform (splat of a value).
679  OK_UniformConstantValue, // Operand is uniform constant.
680  OK_NonUniformConstantValue // Operand is a non uniform constant value.
681  };
682 
683  /// Additional properties of an operand's values.
684  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
685 
686  /// \return The number of scalar or vector registers that the target has.
687  /// If 'Vectors' is true, it returns the number of vector registers. If it is
688  /// set to false, it returns the number of scalar registers.
689  unsigned getNumberOfRegisters(bool Vector) const;
690 
691  /// \return The width of the largest scalar or vector register type.
692  unsigned getRegisterBitWidth(bool Vector) const;
693 
694  /// \return The width of the smallest vector register type.
695  unsigned getMinVectorRegisterBitWidth() const;
696 
697  /// \return True if the vectorization factor should be chosen to
698  /// make the vector of the smallest element type match the size of a
699  /// vector register. For wider element types, this could result in
700  /// creating vectors that span multiple vector registers.
701  /// If false, the vectorization factor will be chosen based on the
702  /// size of the widest element type.
703  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
704 
705  /// \return The minimum vectorization factor for types of given element
706  /// bit width, or 0 if there is no mimimum VF. The returned value only
707  /// applies when shouldMaximizeVectorBandwidth returns true.
708  unsigned getMinimumVF(unsigned ElemWidth) const;
709 
710  /// \return True if it should be considered for address type promotion.
711  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
712  /// profitable without finding other extensions fed by the same input.
713  bool shouldConsiderAddressTypePromotion(
714  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
715 
716  /// \return The size of a cache line in bytes.
717  unsigned getCacheLineSize() const;
718 
719  /// The possible cache levels
720  enum class CacheLevel {
721  L1D, // The L1 data cache
722  L2D, // The L2 data cache
723 
724  // We currently do not model L3 caches, as their sizes differ widely between
725  // microarchitectures. Also, we currently do not have a use for L3 cache
726  // size modeling yet.
727  };
728 
729  /// \return The size of the cache level in bytes, if available.
730  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
731 
732  /// \return The associativity of the cache level, if available.
733  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
734 
735  /// \return How much before a load we should place the prefetch instruction.
736  /// This is currently measured in number of instructions.
737  unsigned getPrefetchDistance() const;
738 
739  /// \return Some HW prefetchers can handle accesses up to a certain constant
740  /// stride. This is the minimum stride in bytes where it makes sense to start
741  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
742  unsigned getMinPrefetchStride() const;
743 
744  /// \return The maximum number of iterations to prefetch ahead. If the
745  /// required number of iterations is more than this number, no prefetching is
746  /// performed.
747  unsigned getMaxPrefetchIterationsAhead() const;
748 
749  /// \return The maximum interleave factor that any transform should try to
750  /// perform for this target. This number depends on the level of parallelism
751  /// and the number of execution units in the CPU.
752  unsigned getMaxInterleaveFactor(unsigned VF) const;
753 
754  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
755  static OperandValueKind getOperandInfo(Value *V,
756  OperandValueProperties &OpProps);
757 
758  /// This is an approximation of reciprocal throughput of a math/logic op.
759  /// A higher cost indicates less expected throughput.
760  /// From Agner Fog's guides, reciprocal throughput is "the average number of
761  /// clock cycles per instruction when the instructions are not part of a
762  /// limiting dependency chain."
763  /// Therefore, costs should be scaled to account for multiple execution units
764  /// on the target that can process this type of instruction. For example, if
765  /// there are 5 scalar integer units and 2 vector integer units that can
766  /// calculate an 'add' in a single cycle, this model should indicate that the
767  /// cost of the vector add instruction is 2.5 times the cost of the scalar
768  /// add instruction.
769  /// \p Args is an optional argument which holds the instruction operands
770  /// values so the TTI can analyze those values searching for special
771  /// cases or optimizations based on those values.
772  int getArithmeticInstrCost(
773  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
774  OperandValueKind Opd2Info = OK_AnyValue,
775  OperandValueProperties Opd1PropInfo = OP_None,
776  OperandValueProperties Opd2PropInfo = OP_None,
778 
779  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
780  /// The index and subtype parameters are used by the subvector insertion and
781  /// extraction shuffle kinds to show the insert/extract point and the type of
782  /// the subvector being inserted/extracted.
783  /// NOTE: For subvector extractions Tp represents the source type.
784  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
785  Type *SubTp = nullptr) const;
786 
787  /// \return The expected cost of cast instructions, such as bitcast, trunc,
788  /// zext, etc. If there is an existing instruction that holds Opcode, it
789  /// may be passed in the 'I' parameter.
790  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
791  const Instruction *I = nullptr) const;
792 
793  /// \return The expected cost of a sign- or zero-extended vector extract. Use
794  /// -1 to indicate that there is no information about the index value.
795  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
796  unsigned Index = -1) const;
797 
798  /// \return The expected cost of control-flow related instructions such as
799  /// Phi, Ret, Br.
800  int getCFInstrCost(unsigned Opcode) const;
801 
802  /// \returns The expected cost of compare and select instructions. If there
803  /// is an existing instruction that holds Opcode, it may be passed in the
804  /// 'I' parameter.
805  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
806  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
807 
808  /// \return The expected cost of vector Insert and Extract.
809  /// Use -1 to indicate that there is no information on the index value.
810  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
811 
812  /// \return The cost of Load and Store instructions.
813  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
814  unsigned AddressSpace, const Instruction *I = nullptr) const;
815 
816  /// \return The cost of masked Load and Store instructions.
817  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
818  unsigned AddressSpace) const;
819 
820  /// \return The cost of Gather or Scatter operation
821  /// \p Opcode - is a type of memory access Load or Store
822  /// \p DataTy - a vector type of the data to be loaded or stored
823  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
824  /// \p VariableMask - true when the memory access is predicated with a mask
825  /// that is not a compile-time constant
826  /// \p Alignment - alignment of single element
827  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
828  bool VariableMask, unsigned Alignment) const;
829 
830  /// \return The cost of the interleaved memory operation.
831  /// \p Opcode is the memory operation code
832  /// \p VecTy is the vector type of the interleaved access.
833  /// \p Factor is the interleave factor
834  /// \p Indices is the indices for interleaved load members (as interleaved
835  /// load allows gaps)
836  /// \p Alignment is the alignment of the memory operation
837  /// \p AddressSpace is address space of the pointer.
838  /// \p UseMaskForCond indicates if the memory access is predicated.
839  /// \p UseMaskForGaps indicates if gaps should be masked.
840  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
841  ArrayRef<unsigned> Indices, unsigned Alignment,
842  unsigned AddressSpace,
843  bool UseMaskForCond = false,
844  bool UseMaskForGaps = false) const;
845 
846  /// Calculate the cost of performing a vector reduction.
847  ///
848  /// This is the cost of reducing the vector value of type \p Ty to a scalar
849  /// value using the operation denoted by \p Opcode. The form of the reduction
850  /// can either be a pairwise reduction or a reduction that splits the vector
851  /// at every reduction level.
852  ///
853  /// Pairwise:
854  /// (v0, v1, v2, v3)
855  /// ((v0+v1), (v2+v3), undef, undef)
856  /// Split:
857  /// (v0, v1, v2, v3)
858  /// ((v0+v2), (v1+v3), undef, undef)
859  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
860  bool IsPairwiseForm) const;
861  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
862  bool IsUnsigned) const;
863 
864  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
865  /// Three cases are handled: 1. scalar instruction 2. vector instruction
866  /// 3. scalar instruction which is to be vectorized with VF.
867  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
869  unsigned VF = 1) const;
870 
871  /// \returns The cost of Intrinsic instructions. Types analysis only.
872  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
873  /// arguments and the return value will be computed based on types.
874  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
876  unsigned ScalarizationCostPassed = UINT_MAX) const;
877 
878  /// \returns The cost of Call instructions.
879  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
880 
881  /// \returns The number of pieces into which the provided type must be
882  /// split during legalization. Zero is returned when the answer is unknown.
883  unsigned getNumberOfParts(Type *Tp) const;
884 
885  /// \returns The cost of the address computation. For most targets this can be
886  /// merged into the instruction indexing mode. Some targets might want to
887  /// distinguish between address computation for memory operations on vector
888  /// types and scalar types. Such targets should override this function.
889  /// The 'SE' parameter holds pointer for the scalar evolution object which
890  /// is used in order to get the Ptr step value in case of constant stride.
891  /// The 'Ptr' parameter holds SCEV of the access pointer.
892  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
893  const SCEV *Ptr = nullptr) const;
894 
895  /// \returns The cost, if any, of keeping values of the given types alive
896  /// over a callsite.
897  ///
898  /// Some types may require the use of register classes that do not have
899  /// any callee-saved registers, so would require a spill and fill.
900  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
901 
902  /// \returns True if the intrinsic is a supported memory intrinsic. Info
903  /// will contain additional information - whether the intrinsic may write
904  /// or read to memory, volatility and the pointer. Info is undefined
905  /// if false is returned.
906  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
907 
908  /// \returns The maximum element size, in bytes, for an element
909  /// unordered-atomic memory intrinsic.
910  unsigned getAtomicMemIntrinsicMaxElementSize() const;
911 
912  /// \returns A value which is the result of the given memory intrinsic. New
913  /// instructions may be created to extract the result from the given intrinsic
914  /// memory operation. Returns nullptr if the target cannot create a result
915  /// from the given intrinsic.
916  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
917  Type *ExpectedType) const;
918 
919  /// \returns The type to use in a loop expansion of a memcpy call.
920  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
921  unsigned SrcAlign, unsigned DestAlign) const;
922 
923  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
924  /// \param RemainingBytes The number of bytes to copy.
925  ///
926  /// Calculates the operand types to use when copying \p RemainingBytes of
927  /// memory, where source and destination alignments are \p SrcAlign and
928  /// \p DestAlign respectively.
929  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
930  LLVMContext &Context,
931  unsigned RemainingBytes,
932  unsigned SrcAlign,
933  unsigned DestAlign) const;
934 
935  /// \returns True if the two functions have compatible attributes for inlining
936  /// purposes.
937  bool areInlineCompatible(const Function *Caller,
938  const Function *Callee) const;
939 
940  /// \returns True if the caller and callee agree on how \p Args will be passed
941  /// to the callee.
942  /// \param[out] Args The list of compatible arguments. The implementation may
943  /// filter out any incompatible args from this list.
944  bool areFunctionArgsABICompatible(const Function *Caller,
945  const Function *Callee,
946  SmallPtrSetImpl<Argument *> &Args) const;
947 
948  /// The type of load/store indexing.
950  MIM_Unindexed, ///< No indexing.
951  MIM_PreInc, ///< Pre-incrementing.
952  MIM_PreDec, ///< Pre-decrementing.
953  MIM_PostInc, ///< Post-incrementing.
954  MIM_PostDec ///< Post-decrementing.
955  };
956 
957  /// \returns True if the specified indexed load for the given type is legal.
958  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
959 
960  /// \returns True if the specified indexed store for the given type is legal.
961  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
962 
963  /// \returns The bitwidth of the largest vector type that should be used to
964  /// load/store in the given address space.
965  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
966 
967  /// \returns True if the load instruction is legal to vectorize.
968  bool isLegalToVectorizeLoad(LoadInst *LI) const;
969 
970  /// \returns True if the store instruction is legal to vectorize.
971  bool isLegalToVectorizeStore(StoreInst *SI) const;
972 
973  /// \returns True if it is legal to vectorize the given load chain.
974  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
975  unsigned Alignment,
976  unsigned AddrSpace) const;
977 
978  /// \returns True if it is legal to vectorize the given store chain.
979  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
980  unsigned Alignment,
981  unsigned AddrSpace) const;
982 
983  /// \returns The new vector factor value if the target doesn't support \p
984  /// SizeInBytes loads or has a better vector factor.
985  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
986  unsigned ChainSizeInBytes,
987  VectorType *VecTy) const;
988 
989  /// \returns The new vector factor value if the target doesn't support \p
990  /// SizeInBytes stores or has a better vector factor.
991  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
992  unsigned ChainSizeInBytes,
993  VectorType *VecTy) const;
994 
995  /// Flags describing the kind of vector reduction.
996  struct ReductionFlags {
997  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
998  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
999  bool IsSigned; ///< Whether the operation is a signed int reduction.
1000  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1001  };
1002 
1003  /// \returns True if the target wants to handle the given reduction idiom in
1004  /// the intrinsics form instead of the shuffle form.
1005  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1006  ReductionFlags Flags) const;
1007 
1008  /// \returns True if the target wants to expand the given reduction intrinsic
1009  /// into a shuffle sequence.
1010  bool shouldExpandReduction(const IntrinsicInst *II) const;
1011  /// @}
1012 
1013 private:
1014  /// Estimate the latency of specified instruction.
1015  /// Returns 1 as the default value.
1016  int getInstructionLatency(const Instruction *I) const;
1017 
1018  /// Returns the expected throughput cost of the instruction.
1019  /// Returns -1 if the cost is unknown.
1020  int getInstructionThroughput(const Instruction *I) const;
1021 
1022  /// The abstract base class used to type erase specific TTI
1023  /// implementations.
1024  class Concept;
1025 
1026  /// The template model for the base class which wraps a concrete
1027  /// implementation in a type erased interface.
1028  template <typename T> class Model;
1029 
1030  std::unique_ptr<Concept> TTIImpl;
1031 };
1032 
1034 public:
1035  virtual ~Concept() = 0;
1036  virtual const DataLayout &getDataLayout() const = 0;
1037  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1038  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1039  ArrayRef<const Value *> Operands) = 0;
1040  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1041  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
1042  virtual int getCallCost(const Function *F, int NumArgs) = 0;
1043  virtual int getCallCost(const Function *F,
1045  virtual unsigned getInliningThresholdMultiplier() = 0;
1046  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1047  ArrayRef<Type *> ParamTys) = 0;
1048  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1049  ArrayRef<const Value *> Arguments) = 0;
1050  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1051  unsigned &JTSize) = 0;
1052  virtual int
1053  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1054  virtual bool hasBranchDivergence() = 0;
1055  virtual bool isSourceOfDivergence(const Value *V) = 0;
1056  virtual bool isAlwaysUniform(const Value *V) = 0;
1057  virtual unsigned getFlatAddressSpace() = 0;
1058  virtual bool isLoweredToCall(const Function *F) = 0;
1059  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1060  UnrollingPreferences &UP) = 0;
1061  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1062  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1063  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1064  int64_t BaseOffset, bool HasBaseReg,
1065  int64_t Scale,
1066  unsigned AddrSpace,
1067  Instruction *I) = 0;
1068  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1070  virtual bool canMacroFuseCmp() = 0;
1071  virtual bool shouldFavorPostInc() const = 0;
1072  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1073  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1074  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1075  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1076  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1077  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1078  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1079  virtual bool prefersVectorizedAddressing() = 0;
1080  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1081  int64_t BaseOffset, bool HasBaseReg,
1082  int64_t Scale, unsigned AddrSpace) = 0;
1083  virtual bool LSRWithInstrQueries() = 0;
1084  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1085  virtual bool isProfitableToHoist(Instruction *I) = 0;
1086  virtual bool useAA() = 0;
1087  virtual bool isTypeLegal(Type *Ty) = 0;
1088  virtual unsigned getJumpBufAlignment() = 0;
1089  virtual unsigned getJumpBufSize() = 0;
1090  virtual bool shouldBuildLookupTables() = 0;
1091  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1092  virtual bool useColdCCForColdCall(Function &F) = 0;
1093  virtual unsigned
1094  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1095  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1096  unsigned VF) = 0;
1097  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1098  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1099  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1100  bool IsZeroCmp) const = 0;
1101  virtual bool enableInterleavedAccessVectorization() = 0;
1102  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1103  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1104  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1105  unsigned BitWidth,
1106  unsigned AddressSpace,
1107  unsigned Alignment,
1108  bool *Fast) = 0;
1109  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1110  virtual bool haveFastSqrt(Type *Ty) = 0;
1111  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1112  virtual int getFPOpCost(Type *Ty) = 0;
1113  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1114  Type *Ty) = 0;
1115  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1116  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1117  Type *Ty) = 0;
1118  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1119  Type *Ty) = 0;
1120  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1121  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1122  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1123  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1124  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1125  virtual bool shouldConsiderAddressTypePromotion(
1126  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1127  virtual unsigned getCacheLineSize() = 0;
1128  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1129  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1130  virtual unsigned getPrefetchDistance() = 0;
1131  virtual unsigned getMinPrefetchStride() = 0;
1132  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1133  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1134  virtual unsigned
1135  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1136  OperandValueKind Opd2Info,
1137  OperandValueProperties Opd1PropInfo,
1138  OperandValueProperties Opd2PropInfo,
1139  ArrayRef<const Value *> Args) = 0;
1140  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1141  Type *SubTp) = 0;
1142  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1143  const Instruction *I) = 0;
1144  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1145  VectorType *VecTy, unsigned Index) = 0;
1146  virtual int getCFInstrCost(unsigned Opcode) = 0;
1147  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1148  Type *CondTy, const Instruction *I) = 0;
1149  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1150  unsigned Index) = 0;
1151  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1152  unsigned AddressSpace, const Instruction *I) = 0;
1153  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1154  unsigned Alignment,
1155  unsigned AddressSpace) = 0;
1156  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1157  Value *Ptr, bool VariableMask,
1158  unsigned Alignment) = 0;
1159  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1160  unsigned Factor,
1161  ArrayRef<unsigned> Indices,
1162  unsigned Alignment,
1163  unsigned AddressSpace,
1164  bool UseMaskForCond = false,
1165  bool UseMaskForGaps = false) = 0;
1166  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1167  bool IsPairwiseForm) = 0;
1168  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1169  bool IsPairwiseForm, bool IsUnsigned) = 0;
1170  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1172  unsigned ScalarizationCostPassed) = 0;
1173  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1174  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1175  virtual int getCallInstrCost(Function *F, Type *RetTy,
1176  ArrayRef<Type *> Tys) = 0;
1177  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1178  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1179  const SCEV *Ptr) = 0;
1180  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1181  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1182  MemIntrinsicInfo &Info) = 0;
1183  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1184  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1185  Type *ExpectedType) = 0;
1186  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1187  unsigned SrcAlign,
1188  unsigned DestAlign) const = 0;
1189  virtual void getMemcpyLoopResidualLoweringType(
1190  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1191  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1192  virtual bool areInlineCompatible(const Function *Caller,
1193  const Function *Callee) const = 0;
1194  virtual bool
1195  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1196  SmallPtrSetImpl<Argument *> &Args) const = 0;
1197  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1198  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1199  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1200  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1201  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1202  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1203  unsigned Alignment,
1204  unsigned AddrSpace) const = 0;
1205  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1206  unsigned Alignment,
1207  unsigned AddrSpace) const = 0;
1208  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1209  unsigned ChainSizeInBytes,
1210  VectorType *VecTy) const = 0;
1211  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1212  unsigned ChainSizeInBytes,
1213  VectorType *VecTy) const = 0;
1214  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1215  ReductionFlags) const = 0;
1216  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1217  virtual int getInstructionLatency(const Instruction *I) = 0;
1218 };
1219 
1220 template <typename T>
1222  T Impl;
1223 
1224 public:
1225  Model(T Impl) : Impl(std::move(Impl)) {}
1226  ~Model() override {}
1227 
1228  const DataLayout &getDataLayout() const override {
1229  return Impl.getDataLayout();
1230  }
1231 
1232  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1233  return Impl.getOperationCost(Opcode, Ty, OpTy);
1234  }
1235  int getGEPCost(Type *PointeeType, const Value *Ptr,
1236  ArrayRef<const Value *> Operands) override {
1237  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1238  }
1239  int getExtCost(const Instruction *I, const Value *Src) override {
1240  return Impl.getExtCost(I, Src);
1241  }
1242  int getCallCost(FunctionType *FTy, int NumArgs) override {
1243  return Impl.getCallCost(FTy, NumArgs);
1244  }
1245  int getCallCost(const Function *F, int NumArgs) override {
1246  return Impl.getCallCost(F, NumArgs);
1247  }
1248  int getCallCost(const Function *F,
1250  return Impl.getCallCost(F, Arguments);
1251  }
1252  unsigned getInliningThresholdMultiplier() override {
1253  return Impl.getInliningThresholdMultiplier();
1254  }
1255  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1256  ArrayRef<Type *> ParamTys) override {
1257  return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1258  }
1259  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1260  ArrayRef<const Value *> Arguments) override {
1261  return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1262  }
1263  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1264  return Impl.getUserCost(U, Operands);
1265  }
1266  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1267  bool isSourceOfDivergence(const Value *V) override {
1268  return Impl.isSourceOfDivergence(V);
1269  }
1270 
1271  bool isAlwaysUniform(const Value *V) override {
1272  return Impl.isAlwaysUniform(V);
1273  }
1274 
1275  unsigned getFlatAddressSpace() override {
1276  return Impl.getFlatAddressSpace();
1277  }
1278 
1279  bool isLoweredToCall(const Function *F) override {
1280  return Impl.isLoweredToCall(F);
1281  }
1282  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1283  UnrollingPreferences &UP) override {
1284  return Impl.getUnrollingPreferences(L, SE, UP);
1285  }
1286  bool isLegalAddImmediate(int64_t Imm) override {
1287  return Impl.isLegalAddImmediate(Imm);
1288  }
1289  bool isLegalICmpImmediate(int64_t Imm) override {
1290  return Impl.isLegalICmpImmediate(Imm);
1291  }
1292  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1293  bool HasBaseReg, int64_t Scale,
1294  unsigned AddrSpace,
1295  Instruction *I) override {
1296  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1297  Scale, AddrSpace, I);
1298  }
1299  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1300  TargetTransformInfo::LSRCost &C2) override {
1301  return Impl.isLSRCostLess(C1, C2);
1302  }
1303  bool canMacroFuseCmp() override {
1304  return Impl.canMacroFuseCmp();
1305  }
1306  bool shouldFavorPostInc() const override {
1307  return Impl.shouldFavorPostInc();
1308  }
1309  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1310  return Impl.shouldFavorBackedgeIndex(L);
1311  }
1312  bool isLegalMaskedStore(Type *DataType) override {
1313  return Impl.isLegalMaskedStore(DataType);
1314  }
1315  bool isLegalMaskedLoad(Type *DataType) override {
1316  return Impl.isLegalMaskedLoad(DataType);
1317  }
1318  bool isLegalMaskedScatter(Type *DataType) override {
1319  return Impl.isLegalMaskedScatter(DataType);
1320  }
1321  bool isLegalMaskedGather(Type *DataType) override {
1322  return Impl.isLegalMaskedGather(DataType);
1323  }
1324  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1325  return Impl.hasDivRemOp(DataType, IsSigned);
1326  }
1327  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1328  return Impl.hasVolatileVariant(I, AddrSpace);
1329  }
1330  bool prefersVectorizedAddressing() override {
1331  return Impl.prefersVectorizedAddressing();
1332  }
1333  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1334  bool HasBaseReg, int64_t Scale,
1335  unsigned AddrSpace) override {
1336  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1337  Scale, AddrSpace);
1338  }
1339  bool LSRWithInstrQueries() override {
1340  return Impl.LSRWithInstrQueries();
1341  }
1342  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1343  return Impl.isTruncateFree(Ty1, Ty2);
1344  }
1345  bool isProfitableToHoist(Instruction *I) override {
1346  return Impl.isProfitableToHoist(I);
1347  }
1348  bool useAA() override { return Impl.useAA(); }
1349  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1350  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1351  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1352  bool shouldBuildLookupTables() override {
1353  return Impl.shouldBuildLookupTables();
1354  }
1355  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1356  return Impl.shouldBuildLookupTablesForConstant(C);
1357  }
1358  bool useColdCCForColdCall(Function &F) override {
1359  return Impl.useColdCCForColdCall(F);
1360  }
1361 
1362  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1363  bool Extract) override {
1364  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1365  }
1366  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1367  unsigned VF) override {
1368  return Impl.getOperandsScalarizationOverhead(Args, VF);
1369  }
1370 
1371  bool supportsEfficientVectorElementLoadStore() override {
1372  return Impl.supportsEfficientVectorElementLoadStore();
1373  }
1374 
1375  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1376  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1377  }
1378  const MemCmpExpansionOptions *enableMemCmpExpansion(
1379  bool IsZeroCmp) const override {
1380  return Impl.enableMemCmpExpansion(IsZeroCmp);
1381  }
1382  bool enableInterleavedAccessVectorization() override {
1383  return Impl.enableInterleavedAccessVectorization();
1384  }
1385  bool enableMaskedInterleavedAccessVectorization() override {
1386  return Impl.enableMaskedInterleavedAccessVectorization();
1387  }
1388  bool isFPVectorizationPotentiallyUnsafe() override {
1389  return Impl.isFPVectorizationPotentiallyUnsafe();
1390  }
1391  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1392  unsigned BitWidth, unsigned AddressSpace,
1393  unsigned Alignment, bool *Fast) override {
1394  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1395  Alignment, Fast);
1396  }
1397  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1398  return Impl.getPopcntSupport(IntTyWidthInBit);
1399  }
1400  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1401 
1402  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1403  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1404  }
1405 
1406  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1407 
1408  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1409  Type *Ty) override {
1410  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1411  }
1412  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1413  return Impl.getIntImmCost(Imm, Ty);
1414  }
1415  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1416  Type *Ty) override {
1417  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1418  }
1419  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1420  Type *Ty) override {
1421  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1422  }
1423  unsigned getNumberOfRegisters(bool Vector) override {
1424  return Impl.getNumberOfRegisters(Vector);
1425  }
1426  unsigned getRegisterBitWidth(bool Vector) const override {
1427  return Impl.getRegisterBitWidth(Vector);
1428  }
1429  unsigned getMinVectorRegisterBitWidth() override {
1430  return Impl.getMinVectorRegisterBitWidth();
1431  }
1432  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1433  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1434  }
1435  unsigned getMinimumVF(unsigned ElemWidth) const override {
1436  return Impl.getMinimumVF(ElemWidth);
1437  }
1438  bool shouldConsiderAddressTypePromotion(
1439  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1440  return Impl.shouldConsiderAddressTypePromotion(
1441  I, AllowPromotionWithoutCommonHeader);
1442  }
1443  unsigned getCacheLineSize() override {
1444  return Impl.getCacheLineSize();
1445  }
1446  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1447  return Impl.getCacheSize(Level);
1448  }
1449  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1450  return Impl.getCacheAssociativity(Level);
1451  }
1452  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1453  unsigned getMinPrefetchStride() override {
1454  return Impl.getMinPrefetchStride();
1455  }
1456  unsigned getMaxPrefetchIterationsAhead() override {
1457  return Impl.getMaxPrefetchIterationsAhead();
1458  }
1459  unsigned getMaxInterleaveFactor(unsigned VF) override {
1460  return Impl.getMaxInterleaveFactor(VF);
1461  }
1462  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1463  unsigned &JTSize) override {
1464  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1465  }
1466  unsigned
1467  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1468  OperandValueKind Opd2Info,
1469  OperandValueProperties Opd1PropInfo,
1470  OperandValueProperties Opd2PropInfo,
1471  ArrayRef<const Value *> Args) override {
1472  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1473  Opd1PropInfo, Opd2PropInfo, Args);
1474  }
1475  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1476  Type *SubTp) override {
1477  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1478  }
1479  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1480  const Instruction *I) override {
1481  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1482  }
1483  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1484  unsigned Index) override {
1485  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1486  }
1487  int getCFInstrCost(unsigned Opcode) override {
1488  return Impl.getCFInstrCost(Opcode);
1489  }
1490  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1491  const Instruction *I) override {
1492  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1493  }
1494  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1495  return Impl.getVectorInstrCost(Opcode, Val, Index);
1496  }
1497  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1498  unsigned AddressSpace, const Instruction *I) override {
1499  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1500  }
1501  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1502  unsigned AddressSpace) override {
1503  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1504  }
1505  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1506  Value *Ptr, bool VariableMask,
1507  unsigned Alignment) override {
1508  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1509  Alignment);
1510  }
1511  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1512  ArrayRef<unsigned> Indices, unsigned Alignment,
1513  unsigned AddressSpace, bool UseMaskForCond,
1514  bool UseMaskForGaps) override {
1515  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1516  Alignment, AddressSpace,
1517  UseMaskForCond, UseMaskForGaps);
1518  }
1519  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1520  bool IsPairwiseForm) override {
1521  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1522  }
1523  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1524  bool IsPairwiseForm, bool IsUnsigned) override {
1525  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1526  }
1527  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1528  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1529  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1530  ScalarizationCostPassed);
1531  }
1532  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1533  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1534  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1535  }
1536  int getCallInstrCost(Function *F, Type *RetTy,
1537  ArrayRef<Type *> Tys) override {
1538  return Impl.getCallInstrCost(F, RetTy, Tys);
1539  }
1540  unsigned getNumberOfParts(Type *Tp) override {
1541  return Impl.getNumberOfParts(Tp);
1542  }
1543  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1544  const SCEV *Ptr) override {
1545  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1546  }
1547  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1548  return Impl.getCostOfKeepingLiveOverCall(Tys);
1549  }
1550  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1551  MemIntrinsicInfo &Info) override {
1552  return Impl.getTgtMemIntrinsic(Inst, Info);
1553  }
1554  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1555  return Impl.getAtomicMemIntrinsicMaxElementSize();
1556  }
1557  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1558  Type *ExpectedType) override {
1559  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1560  }
1561  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1562  unsigned SrcAlign,
1563  unsigned DestAlign) const override {
1564  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1565  }
1566  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1567  LLVMContext &Context,
1568  unsigned RemainingBytes,
1569  unsigned SrcAlign,
1570  unsigned DestAlign) const override {
1571  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1572  SrcAlign, DestAlign);
1573  }
1574  bool areInlineCompatible(const Function *Caller,
1575  const Function *Callee) const override {
1576  return Impl.areInlineCompatible(Caller, Callee);
1577  }
1579  const Function *Caller, const Function *Callee,
1580  SmallPtrSetImpl<Argument *> &Args) const override {
1581  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1582  }
1583  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1584  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1585  }
1586  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1587  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1588  }
1589  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1590  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1591  }
1592  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1593  return Impl.isLegalToVectorizeLoad(LI);
1594  }
1595  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1596  return Impl.isLegalToVectorizeStore(SI);
1597  }
1598  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1599  unsigned Alignment,
1600  unsigned AddrSpace) const override {
1601  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1602  AddrSpace);
1603  }
1604  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1605  unsigned Alignment,
1606  unsigned AddrSpace) const override {
1607  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1608  AddrSpace);
1609  }
1610  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1611  unsigned ChainSizeInBytes,
1612  VectorType *VecTy) const override {
1613  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1614  }
1615  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1616  unsigned ChainSizeInBytes,
1617  VectorType *VecTy) const override {
1618  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1619  }
1620  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1621  ReductionFlags Flags) const override {
1622  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1623  }
1624  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1625  return Impl.shouldExpandReduction(II);
1626  }
1627  int getInstructionLatency(const Instruction *I) override {
1628  return Impl.getInstructionLatency(I);
1629  }
1630 };
1631 
1632 template <typename T>
1634  : TTIImpl(new Model<T>(Impl)) {}
1635 
1636 /// Analysis pass providing the \c TargetTransformInfo.
1637 ///
1638 /// The core idea of the TargetIRAnalysis is to expose an interface through
1639 /// which LLVM targets can analyze and provide information about the middle
1640 /// end's target-independent IR. This supports use cases such as target-aware
1641 /// cost modeling of IR constructs.
1642 ///
1643 /// This is a function analysis because much of the cost modeling for targets
1644 /// is done in a subtarget specific way and LLVM supports compiling different
1645 /// functions targeting different subtargets in order to support runtime
1646 /// dispatch according to the observed subtarget.
1647 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1648 public:
1650 
1651  /// Default construct a target IR analysis.
1652  ///
1653  /// This will use the module's datalayout to construct a baseline
1654  /// conservative TTI result.
1655  TargetIRAnalysis();
1656 
1657  /// Construct an IR analysis pass around a target-provide callback.
1658  ///
1659  /// The callback will be called with a particular function for which the TTI
1660  /// is needed and must return a TTI object for that function.
1661  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1662 
1663  // Value semantics. We spell out the constructors for MSVC.
1665  : TTICallback(Arg.TTICallback) {}
1667  : TTICallback(std::move(Arg.TTICallback)) {}
1669  TTICallback = RHS.TTICallback;
1670  return *this;
1671  }
1673  TTICallback = std::move(RHS.TTICallback);
1674  return *this;
1675  }
1676 
1677  Result run(const Function &F, FunctionAnalysisManager &);
1678 
1679 private:
1681  static AnalysisKey Key;
1682 
1683  /// The callback used to produce a result.
1684  ///
1685  /// We use a completely opaque callback so that targets can provide whatever
1686  /// mechanism they desire for constructing the TTI for a given function.
1687  ///
1688  /// FIXME: Should we really use std::function? It's relatively inefficient.
1689  /// It might be possible to arrange for even stateful callbacks to outlive
1690  /// the analysis and thus use a function_ref which would be lighter weight.
1691  /// This may also be less error prone as the callback is likely to reference
1692  /// the external TargetMachine, and that reference needs to never dangle.
1693  std::function<Result(const Function &)> TTICallback;
1694 
1695  /// Helper function used as the callback in the default constructor.
1696  static Result getDefaultTTI(const Function &F);
1697 };
1698 
1699 /// Wrapper pass for TargetTransformInfo.
1700 ///
1701 /// This pass can be constructed from a TTI object which it stores internally
1702 /// and is queried by passes.
1704  TargetIRAnalysis TIRA;
1706 
1707  virtual void anchor();
1708 
1709 public:
1710  static char ID;
1711 
1712  /// We must provide a default constructor for the pass but it should
1713  /// never be used.
1714  ///
1715  /// Use the constructor below or call one of the creation routines.
1717 
1719 
1720  TargetTransformInfo &getTTI(const Function &F);
1721 };
1722 
1723 /// Create an analysis pass wrapper around a TTI object.
1724 ///
1725 /// This analysis pass just holds the TTI instance and makes it available to
1726 /// clients.
1728 
1729 } // End llvm namespace
1730 
1731 #endif
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
Estimate the overhead of scalarizing an instruction.
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:102
PopcntSupportKind
Flags indicating the kind of support for population count.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
Reverse the order of the vector.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:382
AMDGPU Lower Kernel Arguments
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:424
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:641
const unsigned Kind
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:72
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.