LLVM  4.0.0
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This pass exposes codegen information to IR-level passes. Every
11 /// transformation that uses codegen information is broken into three parts:
12 /// 1. The IR-level analysis pass.
13 /// 2. The IR-level transformation interface which provides the needed
14 /// information.
15 /// 3. Codegen-level implementation which uses target-specific hooks.
16 ///
17 /// This file defines #2, which is the interface that IR-level transformations
18 /// use for querying the codegen.
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24 
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Operator.h"
29 #include "llvm/IR/PassManager.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Support/DataTypes.h"
32 #include <functional>
33 
34 namespace llvm {
35 
36 class Function;
37 class GlobalValue;
38 class Loop;
39 class ScalarEvolution;
40 class SCEV;
41 class Type;
42 class User;
43 class Value;
44 
45 /// \brief Information about a load/store intrinsic defined by the target.
49  NumMemRefs(0), PtrVal(nullptr) {}
50  bool ReadMem;
51  bool WriteMem;
52  /// True only if this memory operation is non-volatile, non-atomic, and
53  /// unordered. (See LoadInst/StoreInst for details on each)
54  bool IsSimple;
55  // Same Id is set by the target for corresponding load/store intrinsics.
56  unsigned short MatchingId;
58 
59  /// This is the pointer that the intrinsic is loading from or storing to.
60  /// If this is non-null, then analysis/optimization passes can assume that
61  /// this intrinsic is functionally equivalent to a load/store from this
62  /// pointer.
64 };
65 
66 /// \brief This pass provides access to the codegen interfaces that are needed
67 /// for IR-level transformations.
69 public:
70  /// \brief Construct a TTI object using a type implementing the \c Concept
71  /// API below.
72  ///
73  /// This is used by targets to construct a TTI wrapping their target-specific
74  /// implementaion that encodes appropriate costs for their target.
75  template <typename T> TargetTransformInfo(T Impl);
76 
77  /// \brief Construct a baseline TTI object using a minimal implementation of
78  /// the \c Concept API below.
79  ///
80  /// The TTI implementation will reflect the information in the DataLayout
81  /// provided if non-null.
82  explicit TargetTransformInfo(const DataLayout &DL);
83 
84  // Provide move semantics.
87 
88  // We need to define the destructor out-of-line to define our sub-classes
89  // out-of-line.
91 
92  /// \brief Handle the invalidation of this information.
93  ///
94  /// When used as a result of \c TargetIRAnalysis this method will be called
95  /// when the function this was computed for changes. When it returns false,
96  /// the information is preserved across those changes.
99  // FIXME: We should probably in some way ensure that the subtarget
100  // information for a function hasn't changed.
101  return false;
102  }
103 
104  /// \name Generic Target Information
105  /// @{
106 
107  /// \brief Underlying constants for 'cost' values in this interface.
108  ///
109  /// Many APIs in this interface return a cost. This enum defines the
110  /// fundamental values that should be used to interpret (and produce) those
111  /// costs. The costs are returned as an int rather than a member of this
112  /// enumeration because it is expected that the cost of one IR instruction
113  /// may have a multiplicative factor to it or otherwise won't fit directly
114  /// into the enum. Moreover, it is common to sum or average costs which works
115  /// better as simple integral values. Thus this enum only provides constants.
116  /// Also note that the returned costs are signed integers to make it natural
117  /// to add, subtract, and test with zero (a common boundary condition). It is
118  /// not expected that 2^32 is a realistic cost to be modeling at any point.
119  ///
120  /// Note that these costs should usually reflect the intersection of code-size
121  /// cost and execution cost. A free instruction is typically one that folds
122  /// into another instruction. For example, reg-to-reg moves can often be
123  /// skipped by renaming the registers in the CPU, but they still are encoded
124  /// and thus wouldn't be considered 'free' here.
126  TCC_Free = 0, ///< Expected to fold away in lowering.
127  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
128  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
129  };
130 
131  /// \brief Estimate the cost of a specific operation when lowered.
132  ///
133  /// Note that this is designed to work on an arbitrary synthetic opcode, and
134  /// thus work for hypothetical queries before an instruction has even been
135  /// formed. However, this does *not* work for GEPs, and must not be called
136  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
137  /// analyzing a GEP's cost required more information.
138  ///
139  /// Typically only the result type is required, and the operand type can be
140  /// omitted. However, if the opcode is one of the cast instructions, the
141  /// operand type is required.
142  ///
143  /// The returned cost is defined in terms of \c TargetCostConstants, see its
144  /// comments for a detailed explanation of the cost values.
145  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
146 
147  /// \brief Estimate the cost of a GEP operation when lowered.
148  ///
149  /// The contract for this function is the same as \c getOperationCost except
150  /// that it supports an interface that provides extra information specific to
151  /// the GEP operation.
152  int getGEPCost(Type *PointeeType, const Value *Ptr,
153  ArrayRef<const Value *> Operands) const;
154 
155  /// \brief Estimate the cost of a function call when lowered.
156  ///
157  /// The contract for this is the same as \c getOperationCost except that it
158  /// supports an interface that provides extra information specific to call
159  /// instructions.
160  ///
161  /// This is the most basic query for estimating call cost: it only knows the
162  /// function type and (potentially) the number of arguments at the call site.
163  /// The latter is only interesting for varargs function types.
164  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
165 
166  /// \brief Estimate the cost of calling a specific function when lowered.
167  ///
168  /// This overload adds the ability to reason about the particular function
169  /// being called in the event it is a library call with special lowering.
170  int getCallCost(const Function *F, int NumArgs = -1) const;
171 
172  /// \brief Estimate the cost of calling a specific function when lowered.
173  ///
174  /// This overload allows specifying a set of candidate argument values.
175  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
176 
177  /// \returns A value by which our inlining threshold should be multiplied.
178  /// This is primarily used to bump up the inlining threshold wholesale on
179  /// targets where calls are unusually expensive.
180  ///
181  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
182  /// individual classes of instructions would be better.
183  unsigned getInliningThresholdMultiplier() const;
184 
185  /// \brief Estimate the cost of an intrinsic when lowered.
186  ///
187  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
188  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
189  ArrayRef<Type *> ParamTys) const;
190 
191  /// \brief Estimate the cost of an intrinsic when lowered.
192  ///
193  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
194  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
195  ArrayRef<const Value *> Arguments) const;
196 
197  /// \brief Estimate the cost of a given IR user when lowered.
198  ///
199  /// This can estimate the cost of either a ConstantExpr or Instruction when
200  /// lowered. It has two primary advantages over the \c getOperationCost and
201  /// \c getGEPCost above, and one significant disadvantage: it can only be
202  /// used when the IR construct has already been formed.
203  ///
204  /// The advantages are that it can inspect the SSA use graph to reason more
205  /// accurately about the cost. For example, all-constant-GEPs can often be
206  /// folded into a load or other instruction, but if they are used in some
207  /// other context they may not be folded. This routine can distinguish such
208  /// cases.
209  ///
210  /// The returned cost is defined in terms of \c TargetCostConstants, see its
211  /// comments for a detailed explanation of the cost values.
212  int getUserCost(const User *U) const;
213 
214  /// \brief Return true if branch divergence exists.
215  ///
216  /// Branch divergence has a significantly negative impact on GPU performance
217  /// when threads in the same wavefront take different paths due to conditional
218  /// branches.
219  bool hasBranchDivergence() const;
220 
221  /// \brief Returns whether V is a source of divergence.
222  ///
223  /// This function provides the target-dependent information for
224  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
225  /// builds the dependency graph, and then runs the reachability algorithm
226  /// starting with the sources of divergence.
227  bool isSourceOfDivergence(const Value *V) const;
228 
229  /// \brief Test whether calls to a function lower to actual program function
230  /// calls.
231  ///
232  /// The idea is to test whether the program is likely to require a 'call'
233  /// instruction or equivalent in order to call the given function.
234  ///
235  /// FIXME: It's not clear that this is a good or useful query API. Client's
236  /// should probably move to simpler cost metrics using the above.
237  /// Alternatively, we could split the cost interface into distinct code-size
238  /// and execution-speed costs. This would allow modelling the core of this
239  /// query more accurately as a call is a single small instruction, but
240  /// incurs significant execution cost.
241  bool isLoweredToCall(const Function *F) const;
242 
243  /// Parameters that control the generic loop unrolling transformation.
245  /// The cost threshold for the unrolled loop. Should be relative to the
246  /// getUserCost values returned by this API, and the expectation is that
247  /// the unrolled loop's instructions when run through that interface should
248  /// not exceed this cost. However, this is only an estimate. Also, specific
249  /// loops may be unrolled even with a cost above this threshold if deemed
250  /// profitable. Set this to UINT_MAX to disable the loop body cost
251  /// restriction.
252  unsigned Threshold;
253  /// If complete unrolling will reduce the cost of the loop, we will boost
254  /// the Threshold by a certain percent to allow more aggressive complete
255  /// unrolling. This value provides the maximum boost percentage that we
256  /// can apply to Threshold (The value should be no less than 100).
257  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
258  /// MaxPercentThresholdBoost / 100)
259  /// E.g. if complete unrolling reduces the loop execution time by 50%
260  /// then we boost the threshold by the factor of 2x. If unrolling is not
261  /// expected to reduce the running time, then we do not increase the
262  /// threshold.
264  /// The cost threshold for the unrolled loop when optimizing for size (set
265  /// to UINT_MAX to disable).
267  /// The cost threshold for the unrolled loop, like Threshold, but used
268  /// for partial/runtime unrolling (set to UINT_MAX to disable).
270  /// The cost threshold for the unrolled loop when optimizing for size, like
271  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
272  /// UINT_MAX to disable).
274  /// A forced unrolling factor (the number of concatenated bodies of the
275  /// original loop in the unrolled loop body). When set to 0, the unrolling
276  /// transformation will select an unrolling factor based on the current cost
277  /// threshold and other factors.
278  unsigned Count;
279  /// A forced peeling factor (the number of bodied of the original loop
280  /// that should be peeled off before the loop body). When set to 0, the
281  /// unrolling transformation will select a peeling factor based on profile
282  /// information and other factors.
283  unsigned PeelCount;
284  /// Default unroll count for loops with run-time trip count.
286  // Set the maximum unrolling factor. The unrolling factor may be selected
287  // using the appropriate cost threshold, but may not exceed this number
288  // (set to UINT_MAX to disable). This does not apply in cases where the
289  // loop is being fully unrolled.
290  unsigned MaxCount;
291  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
292  /// applies even if full unrolling is selected. This allows a target to fall
293  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
295  // Represents number of instructions optimized when "back edge"
296  // becomes "fall through" in unrolled loop.
297  // For now we count a conditional branch on a backedge and a comparison
298  // feeding it.
299  unsigned BEInsns;
300  /// Allow partial unrolling (unrolling of loops to expand the size of the
301  /// loop body, not only to eliminate small constant-trip-count loops).
302  bool Partial;
303  /// Allow runtime unrolling (unrolling of loops to expand the size of the
304  /// loop body even when the number of loop iterations is not known at
305  /// compile time).
306  bool Runtime;
307  /// Allow generation of a loop remainder (extra iterations after unroll).
309  /// Allow emitting expensive instructions (such as divisions) when computing
310  /// the trip count of a loop for runtime unrolling.
312  /// Apply loop unroll on any kind of loop
313  /// (mainly to loops that fail runtime unrolling).
314  bool Force;
315  /// Allow using trip count upper bound to unroll loops.
317  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
319  };
320 
321  /// \brief Get target-customized preferences for the generic loop unrolling
322  /// transformation. The caller will initialize UP with the current
323  /// target-independent defaults.
325 
326  /// @}
327 
328  /// \name Scalar Target Information
329  /// @{
330 
331  /// \brief Flags indicating the kind of support for population count.
332  ///
333  /// Compared to the SW implementation, HW support is supposed to
334  /// significantly boost the performance when the population is dense, and it
335  /// may or may not degrade performance if the population is sparse. A HW
336  /// support is considered as "Fast" if it can outperform, or is on a par
337  /// with, SW implementation when the population is sparse; otherwise, it is
338  /// considered as "Slow".
340 
341  /// \brief Return true if the specified immediate is legal add immediate, that
342  /// is the target has add instructions which can add a register with the
343  /// immediate without having to materialize the immediate into a register.
344  bool isLegalAddImmediate(int64_t Imm) const;
345 
346  /// \brief Return true if the specified immediate is legal icmp immediate,
347  /// that is the target has icmp instructions which can compare a register
348  /// against the immediate without having to materialize the immediate into a
349  /// register.
350  bool isLegalICmpImmediate(int64_t Imm) const;
351 
352  /// \brief Return true if the addressing mode represented by AM is legal for
353  /// this target, for a load/store of the specified type.
354  /// The type may be VoidTy, in which case only return true if the addressing
355  /// mode is legal for a load/store of any legal type.
356  /// TODO: Handle pre/postinc as well.
357  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
358  bool HasBaseReg, int64_t Scale,
359  unsigned AddrSpace = 0) const;
360 
361  /// \brief Return true if the target supports masked load/store
362  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
363  bool isLegalMaskedStore(Type *DataType) const;
364  bool isLegalMaskedLoad(Type *DataType) const;
365 
366  /// \brief Return true if the target supports masked gather/scatter
367  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
368  /// bits scalar type.
369  bool isLegalMaskedScatter(Type *DataType) const;
370  bool isLegalMaskedGather(Type *DataType) const;
371 
372  /// \brief Return the cost of the scaling factor used in the addressing
373  /// mode represented by AM for this target, for a load/store
374  /// of the specified type.
375  /// If the AM is supported, the return value must be >= 0.
376  /// If the AM is not supported, it returns a negative value.
377  /// TODO: Handle pre/postinc as well.
378  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
379  bool HasBaseReg, int64_t Scale,
380  unsigned AddrSpace = 0) const;
381 
382  /// \brief Return true if target supports the load / store
383  /// instruction with the given Offset on the form reg + Offset. It
384  /// may be that Offset is too big for a certain type (register
385  /// class).
386  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const;
387 
388  /// \brief Return true if it's free to truncate a value of type Ty1 to type
389  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
390  /// by referencing its sub-register AX.
391  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
392 
393  /// \brief Return true if it is profitable to hoist instruction in the
394  /// then/else to before if.
395  bool isProfitableToHoist(Instruction *I) const;
396 
397  /// \brief Return true if this type is legal.
398  bool isTypeLegal(Type *Ty) const;
399 
400  /// \brief Returns the target's jmp_buf alignment in bytes.
401  unsigned getJumpBufAlignment() const;
402 
403  /// \brief Returns the target's jmp_buf size in bytes.
404  unsigned getJumpBufSize() const;
405 
406  /// \brief Return true if switches should be turned into lookup tables for the
407  /// target.
408  bool shouldBuildLookupTables() const;
409 
410  /// \brief Return true if switches should be turned into lookup tables
411  /// containing this constant value for the target.
413 
414  /// \brief Don't restrict interleaved unrolling to small loops.
415  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
416 
417  /// \brief Enable matching of interleaved access groups.
419 
420  /// \brief Indicate that it is potentially unsafe to automatically vectorize
421  /// floating-point operations because the semantics of vector and scalar
422  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
423  /// does not support IEEE-754 denormal numbers, while depending on the
424  /// platform, scalar floating-point math does.
425  /// This applies to floating-point math operations and calls, not memory
426  /// operations, shuffles, or casts.
428 
429  /// \brief Determine if the target supports unaligned memory accesses.
431  unsigned BitWidth, unsigned AddressSpace = 0,
432  unsigned Alignment = 1,
433  bool *Fast = nullptr) const;
434 
435  /// \brief Return hardware support for population count.
436  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
437 
438  /// \brief Return true if the hardware has a fast square-root instruction.
439  bool haveFastSqrt(Type *Ty) const;
440 
441  /// \brief Return the expected cost of supporting the floating point operation
442  /// of the specified type.
443  int getFPOpCost(Type *Ty) const;
444 
445  /// \brief Return the expected cost of materializing for the given integer
446  /// immediate of the specified type.
447  int getIntImmCost(const APInt &Imm, Type *Ty) const;
448 
449  /// \brief Return the expected cost of materialization for the given integer
450  /// immediate of the specified type for a given instruction. The cost can be
451  /// zero if the immediate can be folded into the specified instruction.
452  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
453  Type *Ty) const;
454  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
455  Type *Ty) const;
456 
457  /// \brief Return the expected cost for the given integer when optimising
458  /// for size. This is different than the other integer immediate cost
459  /// functions in that it is subtarget agnostic. This is useful when you e.g.
460  /// target one ISA such as Aarch32 but smaller encodings could be possible
461  /// with another such as Thumb. This return value is used as a penalty when
462  /// the total costs for a constant is calculated (the bigger the cost, the
463  /// more beneficial constant hoisting is).
464  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
465  Type *Ty) const;
466  /// @}
467 
468  /// \name Vector Target Information
469  /// @{
470 
471  /// \brief The various kinds of shuffle patterns for vector queries.
472  enum ShuffleKind {
473  SK_Broadcast, ///< Broadcast element 0 to all other elements.
474  SK_Reverse, ///< Reverse the order of the vector.
475  SK_Alternate, ///< Choose alternate elements from vector.
476  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
477  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
478  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
479  ///< with any shuffle mask.
480  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
481  ///< shuffle mask.
482  };
483 
484  /// \brief Additional information about an operand's possible values.
486  OK_AnyValue, // Operand can have any value.
487  OK_UniformValue, // Operand is uniform (splat of a value).
488  OK_UniformConstantValue, // Operand is uniform constant.
489  OK_NonUniformConstantValue // Operand is a non uniform constant value.
490  };
491 
492  /// \brief Additional properties of an operand's values.
494 
495  /// \return The number of scalar or vector registers that the target has.
496  /// If 'Vectors' is true, it returns the number of vector registers. If it is
497  /// set to false, it returns the number of scalar registers.
498  unsigned getNumberOfRegisters(bool Vector) const;
499 
500  /// \return The width of the largest scalar or vector register type.
501  unsigned getRegisterBitWidth(bool Vector) const;
502 
503  /// \return The size of a cache line in bytes.
504  unsigned getCacheLineSize() const;
505 
506  /// \return How much before a load we should place the prefetch instruction.
507  /// This is currently measured in number of instructions.
508  unsigned getPrefetchDistance() const;
509 
510  /// \return Some HW prefetchers can handle accesses up to a certain constant
511  /// stride. This is the minimum stride in bytes where it makes sense to start
512  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
513  unsigned getMinPrefetchStride() const;
514 
515  /// \return The maximum number of iterations to prefetch ahead. If the
516  /// required number of iterations is more than this number, no prefetching is
517  /// performed.
518  unsigned getMaxPrefetchIterationsAhead() const;
519 
520  /// \return The maximum interleave factor that any transform should try to
521  /// perform for this target. This number depends on the level of parallelism
522  /// and the number of execution units in the CPU.
523  unsigned getMaxInterleaveFactor(unsigned VF) const;
524 
525  /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
526  /// \p Args is an optional argument which holds the instruction operands
527  /// values so the TTI can analyize those values searching for special
528  /// cases\optimizations based on those values.
530  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
531  OperandValueKind Opd2Info = OK_AnyValue,
532  OperandValueProperties Opd1PropInfo = OP_None,
533  OperandValueProperties Opd2PropInfo = OP_None,
535 
536  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
537  /// The index and subtype parameters are used by the subvector insertion and
538  /// extraction shuffle kinds.
539  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
540  Type *SubTp = nullptr) const;
541 
542  /// \return The expected cost of cast instructions, such as bitcast, trunc,
543  /// zext, etc.
544  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
545 
546  /// \return The expected cost of a sign- or zero-extended vector extract. Use
547  /// -1 to indicate that there is no information about the index value.
548  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
549  unsigned Index = -1) const;
550 
551  /// \return The expected cost of control-flow related instructions such as
552  /// Phi, Ret, Br.
553  int getCFInstrCost(unsigned Opcode) const;
554 
555  /// \returns The expected cost of compare and select instructions.
556  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
557  Type *CondTy = nullptr) const;
558 
559  /// \return The expected cost of vector Insert and Extract.
560  /// Use -1 to indicate that there is no information on the index value.
561  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
562 
563  /// \return The cost of Load and Store instructions.
564  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
565  unsigned AddressSpace) const;
566 
567  /// \return The cost of masked Load and Store instructions.
568  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
569  unsigned AddressSpace) const;
570 
571  /// \return The cost of Gather or Scatter operation
572  /// \p Opcode - is a type of memory access Load or Store
573  /// \p DataTy - a vector type of the data to be loaded or stored
574  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
575  /// \p VariableMask - true when the memory access is predicated with a mask
576  /// that is not a compile-time constant
577  /// \p Alignment - alignment of single element
578  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
579  bool VariableMask, unsigned Alignment) const;
580 
581  /// \return The cost of the interleaved memory operation.
582  /// \p Opcode is the memory operation code
583  /// \p VecTy is the vector type of the interleaved access.
584  /// \p Factor is the interleave factor
585  /// \p Indices is the indices for interleaved load members (as interleaved
586  /// load allows gaps)
587  /// \p Alignment is the alignment of the memory operation
588  /// \p AddressSpace is address space of the pointer.
589  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
590  ArrayRef<unsigned> Indices, unsigned Alignment,
591  unsigned AddressSpace) const;
592 
593  /// \brief Calculate the cost of performing a vector reduction.
594  ///
595  /// This is the cost of reducing the vector value of type \p Ty to a scalar
596  /// value using the operation denoted by \p Opcode. The form of the reduction
597  /// can either be a pairwise reduction or a reduction that splits the vector
598  /// at every reduction level.
599  ///
600  /// Pairwise:
601  /// (v0, v1, v2, v3)
602  /// ((v0+v1), (v2, v3), undef, undef)
603  /// Split:
604  /// (v0, v1, v2, v3)
605  /// ((v0+v2), (v1+v3), undef, undef)
606  int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
607 
608  /// \returns The cost of Intrinsic instructions. Types analysis only.
610  ArrayRef<Type *> Tys, FastMathFlags FMF) const;
611 
612  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
615 
616  /// \returns The cost of Call instructions.
617  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
618 
619  /// \returns The number of pieces into which the provided type must be
620  /// split during legalization. Zero is returned when the answer is unknown.
621  unsigned getNumberOfParts(Type *Tp) const;
622 
623  /// \returns The cost of the address computation. For most targets this can be
624  /// merged into the instruction indexing mode. Some targets might want to
625  /// distinguish between address computation for memory operations on vector
626  /// types and scalar types. Such targets should override this function.
627  /// The 'SE' parameter holds pointer for the scalar evolution object which
628  /// is used in order to get the Ptr step value in case of constant stride.
629  /// The 'Ptr' parameter holds SCEV of the access pointer.
630  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
631  const SCEV *Ptr = nullptr) const;
632 
633  /// \returns The cost, if any, of keeping values of the given types alive
634  /// over a callsite.
635  ///
636  /// Some types may require the use of register classes that do not have
637  /// any callee-saved registers, so would require a spill and fill.
639 
640  /// \returns True if the intrinsic is a supported memory intrinsic. Info
641  /// will contain additional information - whether the intrinsic may write
642  /// or read to memory, volatility and the pointer. Info is undefined
643  /// if false is returned.
644  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
645 
646  /// \returns A value which is the result of the given memory intrinsic. New
647  /// instructions may be created to extract the result from the given intrinsic
648  /// memory operation. Returns nullptr if the target cannot create a result
649  /// from the given intrinsic.
651  Type *ExpectedType) const;
652 
653  /// \returns True if the two functions have compatible attributes for inlining
654  /// purposes.
655  bool areInlineCompatible(const Function *Caller,
656  const Function *Callee) const;
657 
658  /// \returns The bitwidth of the largest vector type that should be used to
659  /// load/store in the given address space.
660  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
661 
662  /// \returns True if the load instruction is legal to vectorize.
663  bool isLegalToVectorizeLoad(LoadInst *LI) const;
664 
665  /// \returns True if the store instruction is legal to vectorize.
666  bool isLegalToVectorizeStore(StoreInst *SI) const;
667 
668  /// \returns True if it is legal to vectorize the given load chain.
669  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
670  unsigned Alignment,
671  unsigned AddrSpace) const;
672 
673  /// \returns True if it is legal to vectorize the given store chain.
674  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
675  unsigned Alignment,
676  unsigned AddrSpace) const;
677 
678  /// \returns The new vector factor value if the target doesn't support \p
679  /// SizeInBytes loads or has a better vector factor.
680  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
681  unsigned ChainSizeInBytes,
682  VectorType *VecTy) const;
683 
684  /// \returns The new vector factor value if the target doesn't support \p
685  /// SizeInBytes stores or has a better vector factor.
686  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
687  unsigned ChainSizeInBytes,
688  VectorType *VecTy) const;
689 
690  /// @}
691 
692 private:
693  /// \brief The abstract base class used to type erase specific TTI
694  /// implementations.
695  class Concept;
696 
697  /// \brief The template model for the base class which wraps a concrete
698  /// implementation in a type erased interface.
699  template <typename T> class Model;
700 
701  std::unique_ptr<Concept> TTIImpl;
702 };
703 
705 public:
706  virtual ~Concept() = 0;
707  virtual const DataLayout &getDataLayout() const = 0;
708  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
709  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
710  ArrayRef<const Value *> Operands) = 0;
711  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
712  virtual int getCallCost(const Function *F, int NumArgs) = 0;
713  virtual int getCallCost(const Function *F,
714  ArrayRef<const Value *> Arguments) = 0;
715  virtual unsigned getInliningThresholdMultiplier() = 0;
716  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
717  ArrayRef<Type *> ParamTys) = 0;
718  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
719  ArrayRef<const Value *> Arguments) = 0;
720  virtual int getUserCost(const User *U) = 0;
721  virtual bool hasBranchDivergence() = 0;
722  virtual bool isSourceOfDivergence(const Value *V) = 0;
723  virtual bool isLoweredToCall(const Function *F) = 0;
724  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
725  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
726  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
727  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
728  int64_t BaseOffset, bool HasBaseReg,
729  int64_t Scale,
730  unsigned AddrSpace) = 0;
731  virtual bool isLegalMaskedStore(Type *DataType) = 0;
732  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
733  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
734  virtual bool isLegalMaskedGather(Type *DataType) = 0;
735  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
736  int64_t BaseOffset, bool HasBaseReg,
737  int64_t Scale, unsigned AddrSpace) = 0;
738  virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
739  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
740  virtual bool isProfitableToHoist(Instruction *I) = 0;
741  virtual bool isTypeLegal(Type *Ty) = 0;
742  virtual unsigned getJumpBufAlignment() = 0;
743  virtual unsigned getJumpBufSize() = 0;
744  virtual bool shouldBuildLookupTables() = 0;
745  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
746  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
747  virtual bool enableInterleavedAccessVectorization() = 0;
748  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
750  unsigned BitWidth,
751  unsigned AddressSpace,
752  unsigned Alignment,
753  bool *Fast) = 0;
754  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
755  virtual bool haveFastSqrt(Type *Ty) = 0;
756  virtual int getFPOpCost(Type *Ty) = 0;
757  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
758  Type *Ty) = 0;
759  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
760  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
761  Type *Ty) = 0;
762  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
763  Type *Ty) = 0;
764  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
765  virtual unsigned getRegisterBitWidth(bool Vector) = 0;
766  virtual unsigned getCacheLineSize() = 0;
767  virtual unsigned getPrefetchDistance() = 0;
768  virtual unsigned getMinPrefetchStride() = 0;
769  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
770  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
771  virtual unsigned
772  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
773  OperandValueKind Opd2Info,
774  OperandValueProperties Opd1PropInfo,
775  OperandValueProperties Opd2PropInfo,
777  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
778  Type *SubTp) = 0;
779  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
780  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
781  VectorType *VecTy, unsigned Index) = 0;
782  virtual int getCFInstrCost(unsigned Opcode) = 0;
783  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
784  Type *CondTy) = 0;
785  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
786  unsigned Index) = 0;
787  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
788  unsigned AddressSpace) = 0;
789  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
790  unsigned Alignment,
791  unsigned AddressSpace) = 0;
792  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
793  Value *Ptr, bool VariableMask,
794  unsigned Alignment) = 0;
795  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
796  unsigned Factor,
797  ArrayRef<unsigned> Indices,
798  unsigned Alignment,
799  unsigned AddressSpace) = 0;
800  virtual int getReductionCost(unsigned Opcode, Type *Ty,
801  bool IsPairwiseForm) = 0;
802  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
803  ArrayRef<Type *> Tys,
804  FastMathFlags FMF) = 0;
805  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
806  ArrayRef<Value *> Args,
807  FastMathFlags FMF) = 0;
808  virtual int getCallInstrCost(Function *F, Type *RetTy,
809  ArrayRef<Type *> Tys) = 0;
810  virtual unsigned getNumberOfParts(Type *Tp) = 0;
811  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
812  const SCEV *Ptr) = 0;
813  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
814  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
815  MemIntrinsicInfo &Info) = 0;
817  Type *ExpectedType) = 0;
818  virtual bool areInlineCompatible(const Function *Caller,
819  const Function *Callee) const = 0;
820  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
821  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
822  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
823  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
824  unsigned Alignment,
825  unsigned AddrSpace) const = 0;
826  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
827  unsigned Alignment,
828  unsigned AddrSpace) const = 0;
829  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
830  unsigned ChainSizeInBytes,
831  VectorType *VecTy) const = 0;
832  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
833  unsigned ChainSizeInBytes,
834  VectorType *VecTy) const = 0;
835 };
836 
837 template <typename T>
839  T Impl;
840 
841 public:
842  Model(T Impl) : Impl(std::move(Impl)) {}
843  ~Model() override {}
844 
845  const DataLayout &getDataLayout() const override {
846  return Impl.getDataLayout();
847  }
848 
849  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
850  return Impl.getOperationCost(Opcode, Ty, OpTy);
851  }
852  int getGEPCost(Type *PointeeType, const Value *Ptr,
853  ArrayRef<const Value *> Operands) override {
854  return Impl.getGEPCost(PointeeType, Ptr, Operands);
855  }
856  int getCallCost(FunctionType *FTy, int NumArgs) override {
857  return Impl.getCallCost(FTy, NumArgs);
858  }
859  int getCallCost(const Function *F, int NumArgs) override {
860  return Impl.getCallCost(F, NumArgs);
861  }
862  int getCallCost(const Function *F,
863  ArrayRef<const Value *> Arguments) override {
864  return Impl.getCallCost(F, Arguments);
865  }
866  unsigned getInliningThresholdMultiplier() override {
867  return Impl.getInliningThresholdMultiplier();
868  }
869  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
870  ArrayRef<Type *> ParamTys) override {
871  return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
872  }
873  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
874  ArrayRef<const Value *> Arguments) override {
875  return Impl.getIntrinsicCost(IID, RetTy, Arguments);
876  }
877  int getUserCost(const User *U) override { return Impl.getUserCost(U); }
878  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
879  bool isSourceOfDivergence(const Value *V) override {
880  return Impl.isSourceOfDivergence(V);
881  }
882  bool isLoweredToCall(const Function *F) override {
883  return Impl.isLoweredToCall(F);
884  }
885  void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
886  return Impl.getUnrollingPreferences(L, UP);
887  }
888  bool isLegalAddImmediate(int64_t Imm) override {
889  return Impl.isLegalAddImmediate(Imm);
890  }
891  bool isLegalICmpImmediate(int64_t Imm) override {
892  return Impl.isLegalICmpImmediate(Imm);
893  }
894  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
895  bool HasBaseReg, int64_t Scale,
896  unsigned AddrSpace) override {
897  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
898  Scale, AddrSpace);
899  }
900  bool isLegalMaskedStore(Type *DataType) override {
901  return Impl.isLegalMaskedStore(DataType);
902  }
903  bool isLegalMaskedLoad(Type *DataType) override {
904  return Impl.isLegalMaskedLoad(DataType);
905  }
906  bool isLegalMaskedScatter(Type *DataType) override {
907  return Impl.isLegalMaskedScatter(DataType);
908  }
909  bool isLegalMaskedGather(Type *DataType) override {
910  return Impl.isLegalMaskedGather(DataType);
911  }
912  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
913  bool HasBaseReg, int64_t Scale,
914  unsigned AddrSpace) override {
915  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
916  Scale, AddrSpace);
917  }
918  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
919  return Impl.isFoldableMemAccessOffset(I, Offset);
920  }
921  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
922  return Impl.isTruncateFree(Ty1, Ty2);
923  }
924  bool isProfitableToHoist(Instruction *I) override {
925  return Impl.isProfitableToHoist(I);
926  }
927  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
928  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
929  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
930  bool shouldBuildLookupTables() override {
931  return Impl.shouldBuildLookupTables();
932  }
933  bool shouldBuildLookupTablesForConstant(Constant *C) override {
934  return Impl.shouldBuildLookupTablesForConstant(C);
935  }
936  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
937  return Impl.enableAggressiveInterleaving(LoopHasReductions);
938  }
939  bool enableInterleavedAccessVectorization() override {
940  return Impl.enableInterleavedAccessVectorization();
941  }
942  bool isFPVectorizationPotentiallyUnsafe() override {
943  return Impl.isFPVectorizationPotentiallyUnsafe();
944  }
945  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
946  unsigned BitWidth, unsigned AddressSpace,
947  unsigned Alignment, bool *Fast) override {
948  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
949  Alignment, Fast);
950  }
951  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
952  return Impl.getPopcntSupport(IntTyWidthInBit);
953  }
954  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
955 
956  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
957 
958  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
959  Type *Ty) override {
960  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
961  }
962  int getIntImmCost(const APInt &Imm, Type *Ty) override {
963  return Impl.getIntImmCost(Imm, Ty);
964  }
965  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
966  Type *Ty) override {
967  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
968  }
969  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
970  Type *Ty) override {
971  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
972  }
973  unsigned getNumberOfRegisters(bool Vector) override {
974  return Impl.getNumberOfRegisters(Vector);
975  }
976  unsigned getRegisterBitWidth(bool Vector) override {
977  return Impl.getRegisterBitWidth(Vector);
978  }
979 
980  unsigned getCacheLineSize() override {
981  return Impl.getCacheLineSize();
982  }
983  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
984  unsigned getMinPrefetchStride() override {
985  return Impl.getMinPrefetchStride();
986  }
987  unsigned getMaxPrefetchIterationsAhead() override {
988  return Impl.getMaxPrefetchIterationsAhead();
989  }
990  unsigned getMaxInterleaveFactor(unsigned VF) override {
991  return Impl.getMaxInterleaveFactor(VF);
992  }
993  unsigned
994  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
995  OperandValueKind Opd2Info,
996  OperandValueProperties Opd1PropInfo,
997  OperandValueProperties Opd2PropInfo,
998  ArrayRef<const Value *> Args) override {
999  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1000  Opd1PropInfo, Opd2PropInfo, Args);
1001  }
1002  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1003  Type *SubTp) override {
1004  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1005  }
1006  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
1007  return Impl.getCastInstrCost(Opcode, Dst, Src);
1008  }
1009  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1010  unsigned Index) override {
1011  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1012  }
1013  int getCFInstrCost(unsigned Opcode) override {
1014  return Impl.getCFInstrCost(Opcode);
1015  }
1016  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
1017  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
1018  }
1019  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1020  return Impl.getVectorInstrCost(Opcode, Val, Index);
1021  }
1022  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1023  unsigned AddressSpace) override {
1024  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1025  }
1026  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1027  unsigned AddressSpace) override {
1028  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1029  }
1030  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1031  Value *Ptr, bool VariableMask,
1032  unsigned Alignment) override {
1033  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1034  Alignment);
1035  }
1036  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1037  ArrayRef<unsigned> Indices, unsigned Alignment,
1038  unsigned AddressSpace) override {
1039  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1040  Alignment, AddressSpace);
1041  }
1042  int getReductionCost(unsigned Opcode, Type *Ty,
1043  bool IsPairwiseForm) override {
1044  return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
1045  }
1046  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1047  FastMathFlags FMF) override {
1048  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
1049  }
1050  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1051  ArrayRef<Value *> Args,
1052  FastMathFlags FMF) override {
1053  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
1054  }
1055  int getCallInstrCost(Function *F, Type *RetTy,
1056  ArrayRef<Type *> Tys) override {
1057  return Impl.getCallInstrCost(F, RetTy, Tys);
1058  }
1059  unsigned getNumberOfParts(Type *Tp) override {
1060  return Impl.getNumberOfParts(Tp);
1061  }
1062  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1063  const SCEV *Ptr) override {
1064  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1065  }
1066  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1067  return Impl.getCostOfKeepingLiveOverCall(Tys);
1068  }
1069  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1070  MemIntrinsicInfo &Info) override {
1071  return Impl.getTgtMemIntrinsic(Inst, Info);
1072  }
1073  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1074  Type *ExpectedType) override {
1075  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1076  }
1077  bool areInlineCompatible(const Function *Caller,
1078  const Function *Callee) const override {
1079  return Impl.areInlineCompatible(Caller, Callee);
1080  }
1081  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1082  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1083  }
1084  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1085  return Impl.isLegalToVectorizeLoad(LI);
1086  }
1087  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1088  return Impl.isLegalToVectorizeStore(SI);
1089  }
1090  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1091  unsigned Alignment,
1092  unsigned AddrSpace) const override {
1093  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1094  AddrSpace);
1095  }
1096  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1097  unsigned Alignment,
1098  unsigned AddrSpace) const override {
1099  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1100  AddrSpace);
1101  }
1102  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1103  unsigned ChainSizeInBytes,
1104  VectorType *VecTy) const override {
1105  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1106  }
1107  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1108  unsigned ChainSizeInBytes,
1109  VectorType *VecTy) const override {
1110  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1111  }
1112 };
1113 
1114 template <typename T>
1116  : TTIImpl(new Model<T>(Impl)) {}
1117 
1118 /// \brief Analysis pass providing the \c TargetTransformInfo.
1119 ///
1120 /// The core idea of the TargetIRAnalysis is to expose an interface through
1121 /// which LLVM targets can analyze and provide information about the middle
1122 /// end's target-independent IR. This supports use cases such as target-aware
1123 /// cost modeling of IR constructs.
1124 ///
1125 /// This is a function analysis because much of the cost modeling for targets
1126 /// is done in a subtarget specific way and LLVM supports compiling different
1127 /// functions targeting different subtargets in order to support runtime
1128 /// dispatch according to the observed subtarget.
1129 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1130 public:
1132 
1133  /// \brief Default construct a target IR analysis.
1134  ///
1135  /// This will use the module's datalayout to construct a baseline
1136  /// conservative TTI result.
1137  TargetIRAnalysis();
1138 
1139  /// \brief Construct an IR analysis pass around a target-provide callback.
1140  ///
1141  /// The callback will be called with a particular function for which the TTI
1142  /// is needed and must return a TTI object for that function.
1143  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1144 
1145  // Value semantics. We spell out the constructors for MSVC.
1147  : TTICallback(Arg.TTICallback) {}
1149  : TTICallback(std::move(Arg.TTICallback)) {}
1151  TTICallback = RHS.TTICallback;
1152  return *this;
1153  }
1155  TTICallback = std::move(RHS.TTICallback);
1156  return *this;
1157  }
1158 
1160 
1161 private:
1163  static AnalysisKey Key;
1164 
1165  /// \brief The callback used to produce a result.
1166  ///
1167  /// We use a completely opaque callback so that targets can provide whatever
1168  /// mechanism they desire for constructing the TTI for a given function.
1169  ///
1170  /// FIXME: Should we really use std::function? It's relatively inefficient.
1171  /// It might be possible to arrange for even stateful callbacks to outlive
1172  /// the analysis and thus use a function_ref which would be lighter weight.
1173  /// This may also be less error prone as the callback is likely to reference
1174  /// the external TargetMachine, and that reference needs to never dangle.
1175  std::function<Result(const Function &)> TTICallback;
1176 
1177  /// \brief Helper function used as the callback in the default constructor.
1178  static Result getDefaultTTI(const Function &F);
1179 };
1180 
1181 /// \brief Wrapper pass for TargetTransformInfo.
1182 ///
1183 /// This pass can be constructed from a TTI object which it stores internally
1184 /// and is queried by passes.
1186  TargetIRAnalysis TIRA;
1188 
1189  virtual void anchor();
1190 
1191 public:
1192  static char ID;
1193 
1194  /// \brief We must provide a default constructor for the pass but it should
1195  /// never be used.
1196  ///
1197  /// Use the constructor below or call one of the creation routines.
1199 
1201 
1202  TargetTransformInfo &getTTI(const Function &F);
1203 };
1204 
1205 /// \brief Create an analysis pass wrapper around a TTI object.
1206 ///
1207 /// This analysis pass just holds the TTI instance and makes it available to
1208 /// clients.
1210 
1211 } // End llvm namespace
1212 
1213 #endif
MachineLoop * L
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)=0
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset)=0
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
virtual const DataLayout & getDataLayout() const =0
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
LLVMContext & Context
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalToVectorizeLoad(LoadInst *LI) const
virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool isLegalToVectorizeStore(StoreInst *SI) const
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
bool hasBranchDivergence() const
Return true if branch divergence exists.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
virtual bool isLoweredToCall(const Function *F)=0
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP)=0
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned getJumpBufSize() const
Returns the target's jmp_buf size in bytes.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
unsigned getMaxInterleaveFactor(unsigned VF) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands) const
Estimate the cost of a GEP operation when lowered.
virtual unsigned getPrefetchDistance()=0
virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)=0
unsigned getNumberOfParts(Type *Tp) const
bool IsSimple
True only if this memory operation is non-volatile, non-atomic, and unordered.
An instruction for reading from memory.
Definition: Instructions.h:164
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
virtual bool isLegalMaskedScatter(Type *DataType)=0
unsigned getMaxPrefetchIterationsAhead() const
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
bool isLegalMaskedGather(Type *DataType) const
virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)=0
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
unsigned getInliningThresholdMultiplier() const
virtual bool isLegalMaskedLoad(Type *DataType)=0
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Type * > Tys, FastMathFlags FMF)=0
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast)=0
virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)=0
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
Choose alternate elements from vector.
Class to represent function types.
Definition: DerivedTypes.h:102
virtual int getFPOpCost(Type *Ty)=0
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
PopcntSupportKind
Flags indicating the kind of support for population count.
unsigned getRegisterBitWidth(bool Vector) const
Function Alias Analysis false
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
virtual unsigned getRegisterBitWidth(bool Vector)=0
virtual bool isLegalAddImmediate(int64_t Imm)=0
An instruction for storing to memory.
Definition: Instructions.h:300
void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > ParamTys) const
Estimate the cost of an intrinsic when lowered.
Reverse the order of the vector.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands)=0
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
ExtractSubvector Index indicates start offset.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Type * > Tys, FastMathFlags FMF) const
virtual unsigned getJumpBufSize()=0
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
This is an important base class in LLVM.
Definition: Constant.h:42
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys) const
virtual unsigned getInliningThresholdMultiplier()=0
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:328
Expected to fold away in lowering.
TargetIRAnalysis()
Default construct a target IR analysis.
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
uint32_t Offset
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isProfitableToHoist(Instruction *I)=0
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Merge elements from two source vectors into one with any shuffle mask.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedGather(Type *DataType)=0
virtual unsigned getNumberOfRegisters(bool Vector)=0
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
virtual bool isLegalMaskedStore(Type *DataType)=0
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const
Return true if target supports the load / store instruction with the given Offset on the form reg + O...
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
OperandValueProperties
Additional properties of an operand's values.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)=0
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:266
virtual unsigned getMinPrefetchStride()=0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
virtual unsigned getCacheLineSize()=0
unsigned getMinPrefetchStride() const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
virtual unsigned getMaxPrefetchIterationsAhead()=0
AddressSpace
Definition: NVPTXBaseInfo.h:22
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)=0
virtual int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)=0
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
Class to represent vector types.
Definition: DerivedTypes.h:369
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Class for arbitrary precision integers.
Definition: APInt.h:77
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
virtual int getCFInstrCost(unsigned Opcode)=0
unsigned getJumpBufAlignment() const
Returns the target's jmp_buf alignment in bytes.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
virtual bool isFPVectorizationPotentiallyUnsafe()=0
Result run(const Function &F, FunctionAnalysisManager &)
could "use" a pointer
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > ParamTys)=0
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
bool isLegalMaskedLoad(Type *DataType) const
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
virtual bool isTypeLegal(Type *Ty)=0
unsigned getPrefetchDistance() const
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)=0
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace) const
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:54
virtual int getUserCost(const User *U)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool enableInterleavedAccessVectorization()=0
TargetCostConstants
Underlying constants for 'cost' values in this interface.
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual int getCallCost(FunctionType *FTy, int NumArgs)=0
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
InsertSubvector. Index indicates start offset.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:525
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
const unsigned Kind
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The cost of a typical 'add' instruction.
TargetTransformInfo Result
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
LLVM Value Representation.
Definition: Value.h:71
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)=0
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Broadcast element 0 to all other elements.
unsigned getNumberOfRegisters(bool Vector) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCFInstrCost(unsigned Opcode) const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:168
OperandValueKind
Additional information about an operand's possible values.
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const =0
A container for analyses that lazily runs them and caches their results.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
TargetTransformInfo & getTTI(const Function &F)
int * Ptr
This header defines various interfaces for pass management in LLVM.
int getUserCost(const User *U) const
Estimate the cost of a given IR user when lowered.
virtual unsigned getJumpBufAlignment()=0
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Information about a load/store intrinsic defined by the target.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
The cost of a 'div' instruction on x86.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr) const
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args)=0
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:64
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >()) const
virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
virtual int getIntImmCost(const APInt &Imm, Type *Ty)=0
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.