LLVM  15.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
25 #include "llvm/IR/FMF.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
32 #include <functional>
33 #include <utility>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class Function;
47 class GlobalValue;
48 class InstCombiner;
50 class IntrinsicInst;
51 class LoadInst;
52 class LoopAccessInfo;
53 class Loop;
54 class LoopInfo;
55 class ProfileSummaryInfo;
57 class SCEV;
58 class ScalarEvolution;
59 class StoreInst;
60 class SwitchInst;
61 class TargetLibraryInfo;
62 class Type;
63 class User;
64 class Value;
65 class VPIntrinsic;
66 struct KnownBits;
67 template <typename T> class Optional;
68 
69 /// Information about a load/store intrinsic defined by the target.
71  /// This is the pointer that the intrinsic is loading from or storing to.
72  /// If this is non-null, then analysis/optimization passes can assume that
73  /// this intrinsic is functionally equivalent to a load/store from this
74  /// pointer.
75  Value *PtrVal = nullptr;
76 
77  // Ordering for atomic operations.
79 
80  // Same Id is set by the target for corresponding load/store intrinsics.
81  unsigned short MatchingId = 0;
82 
83  bool ReadMem = false;
84  bool WriteMem = false;
85  bool IsVolatile = false;
86 
87  bool isUnordered() const {
90  !IsVolatile;
91  }
92 };
93 
94 /// Attributes of a target dependent hardware loop.
96  HardwareLoopInfo() = delete;
98  Loop *L = nullptr;
99  BasicBlock *ExitBlock = nullptr;
100  BranchInst *ExitBranch = nullptr;
101  const SCEV *ExitCount = nullptr;
102  IntegerType *CountType = nullptr;
103  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104  // value in every iteration.
105  bool IsNestingLegal = false; // Can a hardware loop be a parent to
106  // another hardware loop?
107  bool CounterInReg = false; // Should loop counter be updated in
108  // the loop via a phi?
109  bool PerformEntryTest = false; // Generate the intrinsic which also performs
110  // icmp ne zero on the loop counter value and
111  // produces an i1 to guard the loop entry.
113  DominatorTree &DT, bool ForceNestedLoop = false,
114  bool ForceHardwareLoopPHI = false);
115  bool canAnalyze(LoopInfo &LI);
116 };
117 
119  const IntrinsicInst *II = nullptr;
120  Type *RetTy = nullptr;
121  Intrinsic::ID IID;
122  SmallVector<Type *, 4> ParamTys;
124  FastMathFlags FMF;
125  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126  // arguments and the return value will be computed based on types.
127  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128 
129 public:
131  Intrinsic::ID Id, const CallBase &CI,
133 
136  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
138 
141 
145  const IntrinsicInst *I = nullptr,
147 
148  Intrinsic::ID getID() const { return IID; }
149  const IntrinsicInst *getInst() const { return II; }
150  Type *getReturnType() const { return RetTy; }
151  FastMathFlags getFlags() const { return FMF; }
152  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
154  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
155 
156  bool isTypeBasedOnly() const {
157  return Arguments.empty();
158  }
159 
160  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
161 };
162 
164 typedef TargetTransformInfo TTI;
165 
166 /// This pass provides access to the codegen interfaces that are needed
167 /// for IR-level transformations.
169 public:
170  /// Construct a TTI object using a type implementing the \c Concept
171  /// API below.
172  ///
173  /// This is used by targets to construct a TTI wrapping their target-specific
174  /// implementation that encodes appropriate costs for their target.
175  template <typename T> TargetTransformInfo(T Impl);
176 
177  /// Construct a baseline TTI object using a minimal implementation of
178  /// the \c Concept API below.
179  ///
180  /// The TTI implementation will reflect the information in the DataLayout
181  /// provided if non-null.
182  explicit TargetTransformInfo(const DataLayout &DL);
183 
184  // Provide move semantics.
187 
188  // We need to define the destructor out-of-line to define our sub-classes
189  // out-of-line.
191 
192  /// Handle the invalidation of this information.
193  ///
194  /// When used as a result of \c TargetIRAnalysis this method will be called
195  /// when the function this was computed for changes. When it returns false,
196  /// the information is preserved across those changes.
199  // FIXME: We should probably in some way ensure that the subtarget
200  // information for a function hasn't changed.
201  return false;
202  }
203 
204  /// \name Generic Target Information
205  /// @{
206 
207  /// The kind of cost model.
208  ///
209  /// There are several different cost models that can be customized by the
210  /// target. The normalization of each cost model may be target specific.
212  TCK_RecipThroughput, ///< Reciprocal throughput.
213  TCK_Latency, ///< The latency of instruction.
214  TCK_CodeSize, ///< Instruction code size.
215  TCK_SizeAndLatency ///< The weighted sum of size and latency.
216  };
217 
218  /// Query the cost of a specified instruction.
219  ///
220  /// Clients should use this interface to query the cost of an existing
221  /// instruction. The instruction must have a valid parent (basic block).
222  ///
223  /// Note, this method does not cache the cost calculation and it
224  /// can be expensive in some cases.
226  enum TargetCostKind kind) const {
227  InstructionCost Cost;
228  switch (kind) {
229  case TCK_RecipThroughput:
230  Cost = getInstructionThroughput(I);
231  break;
232  case TCK_Latency:
233  Cost = getInstructionLatency(I);
234  break;
235  case TCK_CodeSize:
236  case TCK_SizeAndLatency:
237  Cost = getUserCost(I, kind);
238  break;
239  }
240  return Cost;
241  }
242 
243  /// Underlying constants for 'cost' values in this interface.
244  ///
245  /// Many APIs in this interface return a cost. This enum defines the
246  /// fundamental values that should be used to interpret (and produce) those
247  /// costs. The costs are returned as an int rather than a member of this
248  /// enumeration because it is expected that the cost of one IR instruction
249  /// may have a multiplicative factor to it or otherwise won't fit directly
250  /// into the enum. Moreover, it is common to sum or average costs which works
251  /// better as simple integral values. Thus this enum only provides constants.
252  /// Also note that the returned costs are signed integers to make it natural
253  /// to add, subtract, and test with zero (a common boundary condition). It is
254  /// not expected that 2^32 is a realistic cost to be modeling at any point.
255  ///
256  /// Note that these costs should usually reflect the intersection of code-size
257  /// cost and execution cost. A free instruction is typically one that folds
258  /// into another instruction. For example, reg-to-reg moves can often be
259  /// skipped by renaming the registers in the CPU, but they still are encoded
260  /// and thus wouldn't be considered 'free' here.
262  TCC_Free = 0, ///< Expected to fold away in lowering.
263  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
264  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
265  };
266 
267  /// Estimate the cost of a GEP operation when lowered.
269  getGEPCost(Type *PointeeType, const Value *Ptr,
272 
273  /// \returns A value by which our inlining threshold should be multiplied.
274  /// This is primarily used to bump up the inlining threshold wholesale on
275  /// targets where calls are unusually expensive.
276  ///
277  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
278  /// individual classes of instructions would be better.
279  unsigned getInliningThresholdMultiplier() const;
280 
281  /// \returns A value to be added to the inlining threshold.
282  unsigned adjustInliningThreshold(const CallBase *CB) const;
283 
284  /// \returns Vector bonus in percent.
285  ///
286  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
287  /// and apply this bonus based on the percentage of vector instructions. A
288  /// bonus is applied if the vector instructions exceed 50% and half that
289  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
290  /// arbitrary and evolved over time by accident as much as because they are
291  /// principled bonuses.
292  /// FIXME: It would be nice to base the bonus values on something more
293  /// scientific. A target may has no bonus on vector instructions.
294  int getInlinerVectorBonusPercent() const;
295 
296  /// \return the expected cost of a memcpy, which could e.g. depend on the
297  /// source/destination type and alignment and the number of bytes copied.
299 
300  /// \return The estimated number of case clusters when lowering \p 'SI'.
301  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
302  /// table.
304  unsigned &JTSize,
305  ProfileSummaryInfo *PSI,
306  BlockFrequencyInfo *BFI) const;
307 
308  /// Estimate the cost of a given IR user when lowered.
309  ///
310  /// This can estimate the cost of either a ConstantExpr or Instruction when
311  /// lowered.
312  ///
313  /// \p Operands is a list of operands which can be a result of transformations
314  /// of the current operands. The number of the operands on the list must equal
315  /// to the number of the current operands the IR user has. Their order on the
316  /// list must be the same as the order of the current operands the IR user
317  /// has.
318  ///
319  /// The returned cost is defined in terms of \c TargetCostConstants, see its
320  /// comments for a detailed explanation of the cost values.
322  TargetCostKind CostKind) const;
323 
324  /// This is a helper function which calls the two-argument getUserCost
325  /// with \p Operands which are the current operands U has.
328  return getUserCost(U, Operands, CostKind);
329  }
330 
331  /// If a branch or a select condition is skewed in one direction by more than
332  /// this factor, it is very likely to be predicted correctly.
334 
335  /// Return true if branch divergence exists.
336  ///
337  /// Branch divergence has a significantly negative impact on GPU performance
338  /// when threads in the same wavefront take different paths due to conditional
339  /// branches.
340  bool hasBranchDivergence() const;
341 
342  /// Return true if the target prefers to use GPU divergence analysis to
343  /// replace the legacy version.
344  bool useGPUDivergenceAnalysis() const;
345 
346  /// Returns whether V is a source of divergence.
347  ///
348  /// This function provides the target-dependent information for
349  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
350  /// first builds the dependency graph, and then runs the reachability
351  /// algorithm starting with the sources of divergence.
352  bool isSourceOfDivergence(const Value *V) const;
353 
354  // Returns true for the target specific
355  // set of operations which produce uniform result
356  // even taking non-uniform arguments
357  bool isAlwaysUniform(const Value *V) const;
358 
359  /// Returns the address space ID for a target's 'flat' address space. Note
360  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
361  /// refers to as the generic address space. The flat address space is a
362  /// generic address space that can be used access multiple segments of memory
363  /// with different address spaces. Access of a memory location through a
364  /// pointer with this address space is expected to be legal but slower
365  /// compared to the same memory location accessed through a pointer with a
366  /// different address space.
367  //
368  /// This is for targets with different pointer representations which can
369  /// be converted with the addrspacecast instruction. If a pointer is converted
370  /// to this address space, optimizations should attempt to replace the access
371  /// with the source address space.
372  ///
373  /// \returns ~0u if the target does not have such a flat address space to
374  /// optimize away.
375  unsigned getFlatAddressSpace() const;
376 
377  /// Return any intrinsic address operand indexes which may be rewritten if
378  /// they use a flat address space pointer.
379  ///
380  /// \returns true if the intrinsic was handled.
382  Intrinsic::ID IID) const;
383 
384  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385 
386  /// Return true if globals in this address space can have initializers other
387  /// than `undef`.
388  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
389 
390  unsigned getAssumedAddrSpace(const Value *V) const;
391 
392  std::pair<const Value *, unsigned>
393  getPredicatedAddrSpace(const Value *V) const;
394 
395  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
396  /// NewV, which has a different address space. This should happen for every
397  /// operand index that collectFlatAddressOperands returned for the intrinsic.
398  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
399  /// new value (which may be the original \p II with modified operands).
401  Value *NewV) const;
402 
403  /// Test whether calls to a function lower to actual program function
404  /// calls.
405  ///
406  /// The idea is to test whether the program is likely to require a 'call'
407  /// instruction or equivalent in order to call the given function.
408  ///
409  /// FIXME: It's not clear that this is a good or useful query API. Client's
410  /// should probably move to simpler cost metrics using the above.
411  /// Alternatively, we could split the cost interface into distinct code-size
412  /// and execution-speed costs. This would allow modelling the core of this
413  /// query more accurately as a call is a single small instruction, but
414  /// incurs significant execution cost.
415  bool isLoweredToCall(const Function *F) const;
416 
417  struct LSRCost {
418  /// TODO: Some of these could be merged. Also, a lexical ordering
419  /// isn't always optimal.
420  unsigned Insns;
421  unsigned NumRegs;
422  unsigned AddRecCost;
423  unsigned NumIVMuls;
424  unsigned NumBaseAdds;
425  unsigned ImmCost;
426  unsigned SetupCost;
427  unsigned ScaleCost;
428  };
429 
430  /// Parameters that control the generic loop unrolling transformation.
432  /// The cost threshold for the unrolled loop. Should be relative to the
433  /// getUserCost values returned by this API, and the expectation is that
434  /// the unrolled loop's instructions when run through that interface should
435  /// not exceed this cost. However, this is only an estimate. Also, specific
436  /// loops may be unrolled even with a cost above this threshold if deemed
437  /// profitable. Set this to UINT_MAX to disable the loop body cost
438  /// restriction.
439  unsigned Threshold;
440  /// If complete unrolling will reduce the cost of the loop, we will boost
441  /// the Threshold by a certain percent to allow more aggressive complete
442  /// unrolling. This value provides the maximum boost percentage that we
443  /// can apply to Threshold (The value should be no less than 100).
444  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
445  /// MaxPercentThresholdBoost / 100)
446  /// E.g. if complete unrolling reduces the loop execution time by 50%
447  /// then we boost the threshold by the factor of 2x. If unrolling is not
448  /// expected to reduce the running time, then we do not increase the
449  /// threshold.
451  /// The cost threshold for the unrolled loop when optimizing for size (set
452  /// to UINT_MAX to disable).
454  /// The cost threshold for the unrolled loop, like Threshold, but used
455  /// for partial/runtime unrolling (set to UINT_MAX to disable).
457  /// The cost threshold for the unrolled loop when optimizing for size, like
458  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
459  /// UINT_MAX to disable).
461  /// A forced unrolling factor (the number of concatenated bodies of the
462  /// original loop in the unrolled loop body). When set to 0, the unrolling
463  /// transformation will select an unrolling factor based on the current cost
464  /// threshold and other factors.
465  unsigned Count;
466  /// Default unroll count for loops with run-time trip count.
468  // Set the maximum unrolling factor. The unrolling factor may be selected
469  // using the appropriate cost threshold, but may not exceed this number
470  // (set to UINT_MAX to disable). This does not apply in cases where the
471  // loop is being fully unrolled.
472  unsigned MaxCount;
473  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
474  /// applies even if full unrolling is selected. This allows a target to fall
475  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
477  // Represents number of instructions optimized when "back edge"
478  // becomes "fall through" in unrolled loop.
479  // For now we count a conditional branch on a backedge and a comparison
480  // feeding it.
481  unsigned BEInsns;
482  /// Allow partial unrolling (unrolling of loops to expand the size of the
483  /// loop body, not only to eliminate small constant-trip-count loops).
484  bool Partial;
485  /// Allow runtime unrolling (unrolling of loops to expand the size of the
486  /// loop body even when the number of loop iterations is not known at
487  /// compile time).
488  bool Runtime;
489  /// Allow generation of a loop remainder (extra iterations after unroll).
491  /// Allow emitting expensive instructions (such as divisions) when computing
492  /// the trip count of a loop for runtime unrolling.
494  /// Apply loop unroll on any kind of loop
495  /// (mainly to loops that fail runtime unrolling).
496  bool Force;
497  /// Allow using trip count upper bound to unroll loops.
499  /// Allow unrolling of all the iterations of the runtime loop remainder.
501  /// Allow unroll and jam. Used to enable unroll and jam for the target.
503  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
504  /// value above is used during unroll and jam for the outer loop size.
505  /// This value is used in the same manner to limit the size of the inner
506  /// loop.
508  /// Don't allow loop unrolling to simulate more than this number of
509  /// iterations when checking full unroll profitability
511  };
512 
513  /// Get target-customized preferences for the generic loop unrolling
514  /// transformation. The caller will initialize UP with the current
515  /// target-independent defaults.
518  OptimizationRemarkEmitter *ORE) const;
519 
520  /// Query the target whether it would be profitable to convert the given loop
521  /// into a hardware loop.
523  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
524  HardwareLoopInfo &HWLoopInfo) const;
525 
526  /// Query the target whether it would be prefered to create a predicated
527  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
530  DominatorTree *DT,
531  const LoopAccessInfo *LAI) const;
532 
533  /// Query the target whether lowering of the llvm.get.active.lane.mask
534  /// intrinsic is supported.
535  bool emitGetActiveLaneMask() const;
536 
537  // Parameters that control the loop peeling transformation
539  /// A forced peeling factor (the number of bodied of the original loop
540  /// that should be peeled off before the loop body). When set to 0, the
541  /// a peeling factor based on profile information and other factors.
542  unsigned PeelCount;
543  /// Allow peeling off loop iterations.
545  /// Allow peeling off loop iterations for loop nests.
547  /// Allow peeling basing on profile. Uses to enable peeling off all
548  /// iterations basing on provided profile.
549  /// If the value is true the peeling cost model can decide to peel only
550  /// some iterations and in this case it will set this to false.
552  };
553 
554  /// Get target-customized preferences for the generic loop peeling
555  /// transformation. The caller will initialize \p PP with the current
556  /// target-independent defaults with information from \p L and \p SE.
558  PeelingPreferences &PP) const;
559 
560  /// Targets can implement their own combinations for target-specific
561  /// intrinsics. This function will be called from the InstCombine pass every
562  /// time a target-specific intrinsic is encountered.
563  ///
564  /// \returns None to not do anything target specific or a value that will be
565  /// returned from the InstCombiner. It is possible to return null and stop
566  /// further processing of the intrinsic by returning nullptr.
568  IntrinsicInst &II) const;
569  /// Can be used to implement target-specific instruction combining.
570  /// \see instCombineIntrinsic
573  APInt DemandedMask, KnownBits &Known,
574  bool &KnownBitsComputed) const;
575  /// Can be used to implement target-specific instruction combining.
576  /// \see instCombineIntrinsic
578  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
579  APInt &UndefElts2, APInt &UndefElts3,
580  std::function<void(Instruction *, unsigned, APInt, APInt &)>
581  SimplifyAndSetOp) const;
582  /// @}
583 
584  /// \name Scalar Target Information
585  /// @{
586 
587  /// Flags indicating the kind of support for population count.
588  ///
589  /// Compared to the SW implementation, HW support is supposed to
590  /// significantly boost the performance when the population is dense, and it
591  /// may or may not degrade performance if the population is sparse. A HW
592  /// support is considered as "Fast" if it can outperform, or is on a par
593  /// with, SW implementation when the population is sparse; otherwise, it is
594  /// considered as "Slow".
596 
597  /// Return true if the specified immediate is legal add immediate, that
598  /// is the target has add instructions which can add a register with the
599  /// immediate without having to materialize the immediate into a register.
600  bool isLegalAddImmediate(int64_t Imm) const;
601 
602  /// Return true if the specified immediate is legal icmp immediate,
603  /// that is the target has icmp instructions which can compare a register
604  /// against the immediate without having to materialize the immediate into a
605  /// register.
606  bool isLegalICmpImmediate(int64_t Imm) const;
607 
608  /// Return true if the addressing mode represented by AM is legal for
609  /// this target, for a load/store of the specified type.
610  /// The type may be VoidTy, in which case only return true if the addressing
611  /// mode is legal for a load/store of any legal type.
612  /// If target returns true in LSRWithInstrQueries(), I may be valid.
613  /// TODO: Handle pre/postinc as well.
614  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
615  bool HasBaseReg, int64_t Scale,
616  unsigned AddrSpace = 0,
617  Instruction *I = nullptr) const;
618 
619  /// Return true if LSR cost of C1 is lower than C1.
621  const TargetTransformInfo::LSRCost &C2) const;
622 
623  /// Return true if LSR major cost is number of registers. Targets which
624  /// implement their own isLSRCostLess and unset number of registers as major
625  /// cost should return false, otherwise return true.
626  bool isNumRegsMajorCostOfLSR() const;
627 
628  /// \returns true if LSR should not optimize a chain that includes \p I.
630 
631  /// Return true if the target can fuse a compare and branch.
632  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
633  /// calculation for the instructions in a loop.
634  bool canMacroFuseCmp() const;
635 
636  /// Return true if the target can save a compare for loop count, for example
637  /// hardware loop saves a compare.
638  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
640  TargetLibraryInfo *LibInfo) const;
641 
646  };
647 
648  /// Return the preferred addressing mode LSR should make efforts to generate.
650  ScalarEvolution *SE) const;
651 
652  /// Return true if the target supports masked store.
653  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
654  /// Return true if the target supports masked load.
655  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
656 
657  /// Return true if the target supports nontemporal store.
658  bool isLegalNTStore(Type *DataType, Align Alignment) const;
659  /// Return true if the target supports nontemporal load.
660  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
661 
662  /// \Returns true if the target supports broadcasting a load to a vector of
663  /// type <NumElements x ElementTy>.
664  bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
665 
666  /// Return true if the target supports masked scatter.
667  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
668  /// Return true if the target supports masked gather.
669  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
670  /// Return true if the target forces scalarizing of llvm.masked.gather
671  /// intrinsics.
672  bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
673  /// Return true if the target forces scalarizing of llvm.masked.scatter
674  /// intrinsics.
675  bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
676 
677  /// Return true if the target supports masked compress store.
678  bool isLegalMaskedCompressStore(Type *DataType) const;
679  /// Return true if the target supports masked expand load.
680  bool isLegalMaskedExpandLoad(Type *DataType) const;
681 
682  /// Return true if this is an alternating opcode pattern that can be lowered
683  /// to a single instruction on the target. In X86 this is for the addsub
684  /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
685  /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
686  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
687  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
688  /// \p VecTy is the vector type of the instruction to be generated.
689  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
690  const SmallBitVector &OpcodeMask) const;
691 
692  /// Return true if we should be enabling ordered reductions for the target.
693  bool enableOrderedReductions() const;
694 
695  /// Return true if the target has a unified operation to calculate division
696  /// and remainder. If so, the additional implicit multiplication and
697  /// subtraction required to calculate a remainder from division are free. This
698  /// can enable more aggressive transformations for division and remainder than
699  /// would typically be allowed using throughput or size cost models.
700  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
701 
702  /// Return true if the given instruction (assumed to be a memory access
703  /// instruction) has a volatile variant. If that's the case then we can avoid
704  /// addrspacecast to generic AS for volatile loads/stores. Default
705  /// implementation returns false, which prevents address space inference for
706  /// volatile loads/stores.
707  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
708 
709  /// Return true if target doesn't mind addresses in vectors.
710  bool prefersVectorizedAddressing() const;
711 
712  /// Return the cost of the scaling factor used in the addressing
713  /// mode represented by AM for this target, for a load/store
714  /// of the specified type.
715  /// If the AM is supported, the return value must be >= 0.
716  /// If the AM is not supported, it returns a negative value.
717  /// TODO: Handle pre/postinc as well.
719  int64_t BaseOffset, bool HasBaseReg,
720  int64_t Scale,
721  unsigned AddrSpace = 0) const;
722 
723  /// Return true if the loop strength reduce pass should make
724  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
725  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
726  /// immediate offset and no index register.
727  bool LSRWithInstrQueries() const;
728 
729  /// Return true if it's free to truncate a value of type Ty1 to type
730  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
731  /// by referencing its sub-register AX.
732  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
733 
734  /// Return true if it is profitable to hoist instruction in the
735  /// then/else to before if.
736  bool isProfitableToHoist(Instruction *I) const;
737 
738  bool useAA() const;
739 
740  /// Return true if this type is legal.
741  bool isTypeLegal(Type *Ty) const;
742 
743  /// Returns the estimated number of registers required to represent \p Ty.
744  unsigned getRegUsageForType(Type *Ty) const;
745 
746  /// Return true if switches should be turned into lookup tables for the
747  /// target.
748  bool shouldBuildLookupTables() const;
749 
750  /// Return true if switches should be turned into lookup tables
751  /// containing this constant value for the target.
753 
754  /// Return true if lookup tables should be turned into relative lookup tables.
755  bool shouldBuildRelLookupTables() const;
756 
757  /// Return true if the input function which is cold at all call sites,
758  /// should use coldcc calling convention.
759  bool useColdCCForColdCall(Function &F) const;
760 
761  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
762  /// are set if the demanded result elements need to be inserted and/or
763  /// extracted from vectors.
765  const APInt &DemandedElts,
766  bool Insert, bool Extract) const;
767 
768  /// Estimate the overhead of scalarizing an instructions unique
769  /// non-constant operands. The (potentially vector) types to use for each of
770  /// argument are passes via Tys.
772  ArrayRef<Type *> Tys) const;
773 
774  /// If target has efficient vector element load/store instructions, it can
775  /// return true here so that insertion/extraction costs are not added to
776  /// the scalarization cost of a load/store.
778 
779  /// If the target supports tail calls.
780  bool supportsTailCalls() const;
781 
782  /// Don't restrict interleaved unrolling to small loops.
783  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
784 
785  /// Returns options for expansion of memcmp. IsZeroCmp is
786  // true if this is the expansion of memcmp(p1, p2, s) == 0.
788  // Return true if memcmp expansion is enabled.
789  operator bool() const { return MaxNumLoads > 0; }
790 
791  // Maximum number of load operations.
792  unsigned MaxNumLoads = 0;
793 
794  // The list of available load sizes (in bytes), sorted in decreasing order.
796 
797  // For memcmp expansion when the memcmp result is only compared equal or
798  // not-equal to 0, allow up to this number of load pairs per block. As an
799  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
800  // a0 = load2bytes &a[0]
801  // b0 = load2bytes &b[0]
802  // a2 = load1byte &a[2]
803  // b2 = load1byte &b[2]
804  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
805  unsigned NumLoadsPerBlock = 1;
806 
807  // Set to true to allow overlapping loads. For example, 7-byte compares can
808  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
809  // requires all loads in LoadSizes to be doable in an unaligned way.
810  bool AllowOverlappingLoads = false;
811  };
813  bool IsZeroCmp) const;
814 
815  /// Enable matching of interleaved access groups.
817 
818  /// Enable matching of interleaved access groups that contain predicated
819  /// accesses or gaps and therefore vectorized using masked
820  /// vector loads/stores.
822 
823  /// Indicate that it is potentially unsafe to automatically vectorize
824  /// floating-point operations because the semantics of vector and scalar
825  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
826  /// does not support IEEE-754 denormal numbers, while depending on the
827  /// platform, scalar floating-point math does.
828  /// This applies to floating-point math operations and calls, not memory
829  /// operations, shuffles, or casts.
831 
832  /// Determine if the target supports unaligned memory accesses.
834  unsigned AddressSpace = 0,
835  Align Alignment = Align(1),
836  bool *Fast = nullptr) const;
837 
838  /// Return hardware support for population count.
839  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
840 
841  /// Return true if the hardware has a fast square-root instruction.
842  bool haveFastSqrt(Type *Ty) const;
843 
844  /// Return true if it is faster to check if a floating-point value is NaN
845  /// (or not-NaN) versus a comparison against a constant FP zero value.
846  /// Targets should override this if materializing a 0.0 for comparison is
847  /// generally as cheap as checking for ordered/unordered.
848  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
849 
850  /// Return the expected cost of supporting the floating point operation
851  /// of the specified type.
852  InstructionCost getFPOpCost(Type *Ty) const;
853 
854  /// Return the expected cost of materializing for the given integer
855  /// immediate of the specified type.
857  TargetCostKind CostKind) const;
858 
859  /// Return the expected cost of materialization for the given integer
860  /// immediate of the specified type for a given instruction. The cost can be
861  /// zero if the immediate can be folded into the specified instruction.
862  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
863  const APInt &Imm, Type *Ty,
865  Instruction *Inst = nullptr) const;
867  const APInt &Imm, Type *Ty,
868  TargetCostKind CostKind) const;
869 
870  /// Return the expected cost for the given integer when optimising
871  /// for size. This is different than the other integer immediate cost
872  /// functions in that it is subtarget agnostic. This is useful when you e.g.
873  /// target one ISA such as Aarch32 but smaller encodings could be possible
874  /// with another such as Thumb. This return value is used as a penalty when
875  /// the total costs for a constant is calculated (the bigger the cost, the
876  /// more beneficial constant hoisting is).
877  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
878  const APInt &Imm, Type *Ty) const;
879  /// @}
880 
881  /// \name Vector Target Information
882  /// @{
883 
884  /// The various kinds of shuffle patterns for vector queries.
885  enum ShuffleKind {
886  SK_Broadcast, ///< Broadcast element 0 to all other elements.
887  SK_Reverse, ///< Reverse the order of the vector.
888  SK_Select, ///< Selects elements from the corresponding lane of
889  ///< either source operand. This is equivalent to a
890  ///< vector select with a constant condition operand.
891  SK_Transpose, ///< Transpose two vectors.
892  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
893  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
894  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
895  ///< with any shuffle mask.
896  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
897  ///< shuffle mask.
898  SK_Splice ///< Concatenates elements from the first input vector
899  ///< with elements of the second input vector. Returning
900  ///< a vector of the same type as the input vectors.
901  };
902 
903  /// Additional information about an operand's possible values.
905  OK_AnyValue, // Operand can have any value.
906  OK_UniformValue, // Operand is uniform (splat of a value).
907  OK_UniformConstantValue, // Operand is uniform constant.
908  OK_NonUniformConstantValue // Operand is a non uniform constant value.
909  };
910 
911  /// Additional properties of an operand's values.
913 
914  /// \return the number of registers in the target-provided register class.
915  unsigned getNumberOfRegisters(unsigned ClassID) const;
916 
917  /// \return the target-provided register class ID for the provided type,
918  /// accounting for type promotion and other type-legalization techniques that
919  /// the target might apply. However, it specifically does not account for the
920  /// scalarization or splitting of vector types. Should a vector type require
921  /// scalarization or splitting into multiple underlying vector registers, that
922  /// type should be mapped to a register class containing no registers.
923  /// Specifically, this is designed to provide a simple, high-level view of the
924  /// register allocation later performed by the backend. These register classes
925  /// don't necessarily map onto the register classes used by the backend.
926  /// FIXME: It's not currently possible to determine how many registers
927  /// are used by the provided type.
928  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
929 
930  /// \return the target-provided register class name
931  const char *getRegisterClassName(unsigned ClassID) const;
932 
934 
935  /// \return The width of the largest scalar or vector register type.
937 
938  /// \return The width of the smallest vector register type.
939  unsigned getMinVectorRegisterBitWidth() const;
940 
941  /// \return The maximum value of vscale if the target specifies an
942  /// architectural maximum vector length, and None otherwise.
944 
945  /// \return the value of vscale to tune the cost model for.
947 
948  /// \return True if the vectorization factor should be chosen to
949  /// make the vector of the smallest element type match the size of a
950  /// vector register. For wider element types, this could result in
951  /// creating vectors that span multiple vector registers.
952  /// If false, the vectorization factor will be chosen based on the
953  /// size of the widest element type.
954  /// \p K Register Kind for vectorization.
956 
957  /// \return The minimum vectorization factor for types of given element
958  /// bit width, or 0 if there is no minimum VF. The returned value only
959  /// applies when shouldMaximizeVectorBandwidth returns true.
960  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
961  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
962 
963  /// \return The maximum vectorization factor for types of given element
964  /// bit width and opcode, or 0 if there is no maximum VF.
965  /// Currently only used by the SLP vectorizer.
966  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
967 
968  /// \return The minimum vectorization factor for the store instruction. Given
969  /// the initial estimation of the minimum vector factor and store value type,
970  /// it tries to find possible lowest VF, which still might be profitable for
971  /// the vectorization.
972  /// \param VF Initial estimation of the minimum vector factor.
973  /// \param ScalarMemTy Scalar memory type of the store operation.
974  /// \param ScalarValTy Scalar type of the stored value.
975  /// Currently only used by the SLP vectorizer.
976  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
977  Type *ScalarValTy) const;
978 
979  /// \return True if it should be considered for address type promotion.
980  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
981  /// profitable without finding other extensions fed by the same input.
983  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
984 
985  /// \return The size of a cache line in bytes.
986  unsigned getCacheLineSize() const;
987 
988  /// The possible cache levels
989  enum class CacheLevel {
990  L1D, // The L1 data cache
991  L2D, // The L2 data cache
992 
993  // We currently do not model L3 caches, as their sizes differ widely between
994  // microarchitectures. Also, we currently do not have a use for L3 cache
995  // size modeling yet.
996  };
997 
998  /// \return The size of the cache level in bytes, if available.
1000 
1001  /// \return The associativity of the cache level, if available.
1003 
1004  /// \return How much before a load we should place the prefetch
1005  /// instruction. This is currently measured in number of
1006  /// instructions.
1007  unsigned getPrefetchDistance() const;
1008 
1009  /// Some HW prefetchers can handle accesses up to a certain constant stride.
1010  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1011  /// and the arguments provided are meant to serve as a basis for deciding this
1012  /// for a particular loop.
1013  ///
1014  /// \param NumMemAccesses Number of memory accesses in the loop.
1015  /// \param NumStridedMemAccesses Number of the memory accesses that
1016  /// ScalarEvolution could find a known stride
1017  /// for.
1018  /// \param NumPrefetches Number of software prefetches that will be
1019  /// emitted as determined by the addresses
1020  /// involved and the cache line size.
1021  /// \param HasCall True if the loop contains a call.
1022  ///
1023  /// \return This is the minimum stride in bytes where it makes sense to start
1024  /// adding SW prefetches. The default is 1, i.e. prefetch with any
1025  /// stride.
1026  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1027  unsigned NumStridedMemAccesses,
1028  unsigned NumPrefetches, bool HasCall) const;
1029 
1030  /// \return The maximum number of iterations to prefetch ahead. If
1031  /// the required number of iterations is more than this number, no
1032  /// prefetching is performed.
1033  unsigned getMaxPrefetchIterationsAhead() const;
1034 
1035  /// \return True if prefetching should also be done for writes.
1036  bool enableWritePrefetching() const;
1037 
1038  /// \return The maximum interleave factor that any transform should try to
1039  /// perform for this target. This number depends on the level of parallelism
1040  /// and the number of execution units in the CPU.
1041  unsigned getMaxInterleaveFactor(unsigned VF) const;
1042 
1043  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1044  static OperandValueKind getOperandInfo(const Value *V,
1045  OperandValueProperties &OpProps);
1046 
1047  /// This is an approximation of reciprocal throughput of a math/logic op.
1048  /// A higher cost indicates less expected throughput.
1049  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1050  /// clock cycles per instruction when the instructions are not part of a
1051  /// limiting dependency chain."
1052  /// Therefore, costs should be scaled to account for multiple execution units
1053  /// on the target that can process this type of instruction. For example, if
1054  /// there are 5 scalar integer units and 2 vector integer units that can
1055  /// calculate an 'add' in a single cycle, this model should indicate that the
1056  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1057  /// add instruction.
1058  /// \p Args is an optional argument which holds the instruction operands
1059  /// values so the TTI can analyze those values searching for special
1060  /// cases or optimizations based on those values.
1061  /// \p CxtI is the optional original context instruction, if one exists, to
1062  /// provide even more information.
1064  unsigned Opcode, Type *Ty,
1066  OperandValueKind Opd1Info = OK_AnyValue,
1067  OperandValueKind Opd2Info = OK_AnyValue,
1068  OperandValueProperties Opd1PropInfo = OP_None,
1069  OperandValueProperties Opd2PropInfo = OP_None,
1071  const Instruction *CxtI = nullptr) const;
1072 
1073  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1074  /// The exact mask may be passed as Mask, or else the array will be empty.
1075  /// The index and subtype parameters are used by the subvector insertion and
1076  /// extraction shuffle kinds to show the insert/extract point and the type of
1077  /// the subvector being inserted/extracted. The operands of the shuffle can be
1078  /// passed through \p Args, which helps improve the cost estimation in some
1079  /// cases, like in broadcast loads.
1080  /// NOTE: For subvector extractions Tp represents the source type.
1082  ArrayRef<int> Mask = None, int Index = 0,
1083  VectorType *SubTp = nullptr,
1084  ArrayRef<const Value *> Args = None) const;
1085 
1086  /// Represents a hint about the context in which a cast is used.
1087  ///
1088  /// For zext/sext, the context of the cast is the operand, which must be a
1089  /// load of some kind. For trunc, the context is of the cast is the single
1090  /// user of the instruction, which must be a store of some kind.
1091  ///
1092  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1093  /// type of cast it's dealing with, as not every cast is equal. For instance,
1094  /// the zext of a load may be free, but the zext of an interleaving load can
1095  //// be (very) expensive!
1096  ///
1097  /// See \c getCastContextHint to compute a CastContextHint from a cast
1098  /// Instruction*. Callers can use it if they don't need to override the
1099  /// context and just want it to be calculated from the instruction.
1100  ///
1101  /// FIXME: This handles the types of load/store that the vectorizer can
1102  /// produce, which are the cases where the context instruction is most
1103  /// likely to be incorrect. There are other situations where that can happen
1104  /// too, which might be handled here but in the long run a more general
1105  /// solution of costing multiple instructions at the same times may be better.
1106  enum class CastContextHint : uint8_t {
1107  None, ///< The cast is not used with a load/store of any kind.
1108  Normal, ///< The cast is used with a normal load/store.
1109  Masked, ///< The cast is used with a masked load/store.
1110  GatherScatter, ///< The cast is used with a gather/scatter.
1111  Interleave, ///< The cast is used with an interleaved load/store.
1112  Reversed, ///< The cast is used with a reversed load/store.
1113  };
1114 
1115  /// Calculates a CastContextHint from \p I.
1116  /// This should be used by callers of getCastInstrCost if they wish to
1117  /// determine the context from some instruction.
1118  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1119  /// or if it's another type of cast.
1121 
1122  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1123  /// zext, etc. If there is an existing instruction that holds Opcode, it
1124  /// may be passed in the 'I' parameter.
1126  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1129  const Instruction *I = nullptr) const;
1130 
1131  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1132  /// -1 to indicate that there is no information about the index value.
1133  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1134  VectorType *VecTy,
1135  unsigned Index = -1) const;
1136 
1137  /// \return The expected cost of control-flow related instructions such as
1138  /// Phi, Ret, Br, Switch.
1140  getCFInstrCost(unsigned Opcode,
1142  const Instruction *I = nullptr) const;
1143 
1144  /// \returns The expected cost of compare and select instructions. If there
1145  /// is an existing instruction that holds Opcode, it may be passed in the
1146  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1147  /// is using a compare with the specified predicate as condition. When vector
1148  /// types are passed, \p VecPred must be used for all lanes.
1150  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1151  CmpInst::Predicate VecPred,
1153  const Instruction *I = nullptr) const;
1154 
1155  /// \return The expected cost of vector Insert and Extract.
1156  /// Use -1 to indicate that there is no information on the index value.
1157  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1158  unsigned Index = -1) const;
1159 
1160  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1161  /// \p ReplicationFactor times.
1162  ///
1163  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1164  /// <0,0,0,1,1,1,2,2,2,3,3,3>
1165  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1166  int VF,
1167  const APInt &DemandedDstElts,
1169 
1170  /// \return The cost of Load and Store instructions.
1172  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1173  unsigned AddressSpace,
1175  const Instruction *I = nullptr) const;
1176 
1177  /// \return The cost of VP Load and Store instructions.
1179  getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1180  unsigned AddressSpace,
1182  const Instruction *I = nullptr) const;
1183 
1184  /// \return The cost of masked Load and Store instructions.
1186  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1188 
1189  /// \return The cost of Gather or Scatter operation
1190  /// \p Opcode - is a type of memory access Load or Store
1191  /// \p DataTy - a vector type of the data to be loaded or stored
1192  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1193  /// \p VariableMask - true when the memory access is predicated with a mask
1194  /// that is not a compile-time constant
1195  /// \p Alignment - alignment of single element
1196  /// \p I - the optional original context instruction, if one exists, e.g. the
1197  /// load/store to transform or the call to the gather/scatter intrinsic
1199  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1201  const Instruction *I = nullptr) const;
1202 
1203  /// \return The cost of the interleaved memory operation.
1204  /// \p Opcode is the memory operation code
1205  /// \p VecTy is the vector type of the interleaved access.
1206  /// \p Factor is the interleave factor
1207  /// \p Indices is the indices for interleaved load members (as interleaved
1208  /// load allows gaps)
1209  /// \p Alignment is the alignment of the memory operation
1210  /// \p AddressSpace is address space of the pointer.
1211  /// \p UseMaskForCond indicates if the memory access is predicated.
1212  /// \p UseMaskForGaps indicates if gaps should be masked.
1214  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1215  Align Alignment, unsigned AddressSpace,
1217  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1218 
1219  /// A helper function to determine the type of reduction algorithm used
1220  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1222  return FMF != None && !(*FMF).allowReassoc();
1223  }
1224 
1225  /// Calculate the cost of vector reduction intrinsics.
1226  ///
1227  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1228  /// value using the operation denoted by \p Opcode. The FastMathFlags
1229  /// parameter \p FMF indicates what type of reduction we are performing:
1230  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1231  /// involves successively splitting a vector into half and doing the
1232  /// operation on the pair of halves until you have a scalar value. For
1233  /// example:
1234  /// (v0, v1, v2, v3)
1235  /// ((v0+v2), (v1+v3), undef, undef)
1236  /// ((v0+v2+v1+v3), undef, undef, undef)
1237  /// This is the default behaviour for integer operations, whereas for
1238  /// floating point we only do this if \p FMF indicates that
1239  /// reassociation is allowed.
1240  /// 2. Ordered. For a vector with N elements this involves performing N
1241  /// operations in lane order, starting with an initial scalar value, i.e.
1242  /// result = InitVal + v0
1243  /// result = result + v1
1244  /// result = result + v2
1245  /// result = result + v3
1246  /// This is only the case for FP operations and when reassociation is not
1247  /// allowed.
1248  ///
1250  unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1252 
1254  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1256 
1257  /// Calculate the cost of an extended reduction pattern, similar to
1258  /// getArithmeticReductionCost of an Add reduction with an extension and
1259  /// optional multiply. This is the cost of as:
1260  /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1261  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1262  /// on a VectorType with ResTy elements and Ty lanes.
1264  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1266 
1267  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1268  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1269  /// 3. scalar instruction which is to be vectorized.
1272 
1273  /// \returns The cost of Call instructions.
1275  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1277 
1278  /// \returns The number of pieces into which the provided type must be
1279  /// split during legalization. Zero is returned when the answer is unknown.
1280  unsigned getNumberOfParts(Type *Tp) const;
1281 
1282  /// \returns The cost of the address computation. For most targets this can be
1283  /// merged into the instruction indexing mode. Some targets might want to
1284  /// distinguish between address computation for memory operations on vector
1285  /// types and scalar types. Such targets should override this function.
1286  /// The 'SE' parameter holds pointer for the scalar evolution object which
1287  /// is used in order to get the Ptr step value in case of constant stride.
1288  /// The 'Ptr' parameter holds SCEV of the access pointer.
1290  ScalarEvolution *SE = nullptr,
1291  const SCEV *Ptr = nullptr) const;
1292 
1293  /// \returns The cost, if any, of keeping values of the given types alive
1294  /// over a callsite.
1295  ///
1296  /// Some types may require the use of register classes that do not have
1297  /// any callee-saved registers, so would require a spill and fill.
1299 
1300  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1301  /// will contain additional information - whether the intrinsic may write
1302  /// or read to memory, volatility and the pointer. Info is undefined
1303  /// if false is returned.
1305 
1306  /// \returns The maximum element size, in bytes, for an element
1307  /// unordered-atomic memory intrinsic.
1308  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1309 
1310  /// \returns A value which is the result of the given memory intrinsic. New
1311  /// instructions may be created to extract the result from the given intrinsic
1312  /// memory operation. Returns nullptr if the target cannot create a result
1313  /// from the given intrinsic.
1315  Type *ExpectedType) const;
1316 
1317  /// \returns The type to use in a loop expansion of a memcpy call.
1318  Type *
1320  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1321  unsigned SrcAlign, unsigned DestAlign,
1322  Optional<uint32_t> AtomicElementSize = None) const;
1323 
1324  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1325  /// \param RemainingBytes The number of bytes to copy.
1326  ///
1327  /// Calculates the operand types to use when copying \p RemainingBytes of
1328  /// memory, where source and destination alignments are \p SrcAlign and
1329  /// \p DestAlign respectively.
1332  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1333  unsigned SrcAlign, unsigned DestAlign,
1334  Optional<uint32_t> AtomicCpySize = None) const;
1335 
1336  /// \returns True if the two functions have compatible attributes for inlining
1337  /// purposes.
1338  bool areInlineCompatible(const Function *Caller,
1339  const Function *Callee) const;
1340 
1341  /// \returns True if the caller and callee agree on how \p Types will be
1342  /// passed to or returned from the callee.
1343  /// to the callee.
1344  /// \param Types List of types to check.
1345  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1346  const ArrayRef<Type *> &Types) const;
1347 
1348  /// The type of load/store indexing.
1350  MIM_Unindexed, ///< No indexing.
1351  MIM_PreInc, ///< Pre-incrementing.
1352  MIM_PreDec, ///< Pre-decrementing.
1353  MIM_PostInc, ///< Post-incrementing.
1354  MIM_PostDec ///< Post-decrementing.
1355  };
1356 
1357  /// \returns True if the specified indexed load for the given type is legal.
1358  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1359 
1360  /// \returns True if the specified indexed store for the given type is legal.
1361  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1362 
1363  /// \returns The bitwidth of the largest vector type that should be used to
1364  /// load/store in the given address space.
1365  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1366 
1367  /// \returns True if the load instruction is legal to vectorize.
1368  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1369 
1370  /// \returns True if the store instruction is legal to vectorize.
1371  bool isLegalToVectorizeStore(StoreInst *SI) const;
1372 
1373  /// \returns True if it is legal to vectorize the given load chain.
1374  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1375  unsigned AddrSpace) const;
1376 
1377  /// \returns True if it is legal to vectorize the given store chain.
1378  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1379  unsigned AddrSpace) const;
1380 
1381  /// \returns True if it is legal to vectorize the given reduction kind.
1383  ElementCount VF) const;
1384 
1385  /// \returns True if the given type is supported for scalable vectors
1386  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1387 
1388  /// \returns The new vector factor value if the target doesn't support \p
1389  /// SizeInBytes loads or has a better vector factor.
1390  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1391  unsigned ChainSizeInBytes,
1392  VectorType *VecTy) const;
1393 
1394  /// \returns The new vector factor value if the target doesn't support \p
1395  /// SizeInBytes stores or has a better vector factor.
1396  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1397  unsigned ChainSizeInBytes,
1398  VectorType *VecTy) const;
1399 
1400  /// Flags describing the kind of vector reduction.
1402  ReductionFlags() = default;
1403  bool IsMaxOp =
1404  false; ///< If the op a min/max kind, true if it's a max operation.
1405  bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1406  bool NoNaN =
1407  false; ///< If op is an fp min/max, whether NaNs may be present.
1408  };
1409 
1410  /// \returns True if the target prefers reductions in loop.
1411  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1412  ReductionFlags Flags) const;
1413 
1414  /// \returns True if the target prefers reductions select kept in the loop
1415  /// when tail folding. i.e.
1416  /// loop:
1417  /// p = phi (0, s)
1418  /// a = add (p, x)
1419  /// s = select (mask, a, p)
1420  /// vecreduce.add(s)
1421  ///
1422  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1423  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1424  /// by the target, this can lead to cleaner code generation.
1425  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1426  ReductionFlags Flags) const;
1427 
1428  /// \returns True if the target wants to expand the given reduction intrinsic
1429  /// into a shuffle sequence.
1430  bool shouldExpandReduction(const IntrinsicInst *II) const;
1431 
1432  /// \returns the size cost of rematerializing a GlobalValue address relative
1433  /// to a stack reload.
1434  unsigned getGISelRematGlobalCost() const;
1435 
1436  /// \returns True if the target supports scalable vectors.
1437  bool supportsScalableVectors() const;
1438 
1439  /// \return true when scalable vectorization is preferred.
1440  bool enableScalableVectorization() const;
1441 
1442  /// \name Vector Predication Information
1443  /// @{
1444  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1445  /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1446  /// Reference - "Vector Predication Intrinsics").
1447  /// Use of %evl is discouraged when that is not the case.
1448  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1449  Align Alignment) const;
1450 
1453  // keep the predicating parameter
1454  Legal = 0,
1455  // where legal, discard the predicate parameter
1456  Discard = 1,
1457  // transform into something else that is also predicating
1459  };
1460 
1461  // How to transform the EVL parameter.
1462  // Legal: keep the EVL parameter as it is.
1463  // Discard: Ignore the EVL parameter where it is safe to do so.
1464  // Convert: Fold the EVL into the mask parameter.
1466 
1467  // How to transform the operator.
1468  // Legal: The target supports this operator.
1469  // Convert: Convert this to a non-VP operation.
1470  // The 'Discard' strategy is invalid.
1472 
1473  bool shouldDoNothing() const {
1474  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1475  }
1478  };
1479 
1480  /// \returns How the target needs this vector-predicated operation to be
1481  /// transformed.
1483  /// @}
1484 
1485  /// @}
1486 
1487 private:
1488  /// Estimate the latency of specified instruction.
1489  /// Returns 1 as the default value.
1490  InstructionCost getInstructionLatency(const Instruction *I) const;
1491 
1492  /// Returns the expected throughput cost of the instruction.
1493  /// Returns -1 if the cost is unknown.
1494  InstructionCost getInstructionThroughput(const Instruction *I) const;
1495 
1496  /// The abstract base class used to type erase specific TTI
1497  /// implementations.
1498  class Concept;
1499 
1500  /// The template model for the base class which wraps a concrete
1501  /// implementation in a type erased interface.
1502  template <typename T> class Model;
1503 
1504  std::unique_ptr<Concept> TTIImpl;
1505 };
1506 
1508 public:
1509  virtual ~Concept() = 0;
1510  virtual const DataLayout &getDataLayout() const = 0;
1511  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1514  virtual unsigned getInliningThresholdMultiplier() = 0;
1515  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1516  virtual int getInlinerVectorBonusPercent() = 0;
1517  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1518  virtual unsigned
1519  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1520  ProfileSummaryInfo *PSI,
1521  BlockFrequencyInfo *BFI) = 0;
1522  virtual InstructionCost getUserCost(const User *U,
1524  TargetCostKind CostKind) = 0;
1526  virtual bool hasBranchDivergence() = 0;
1527  virtual bool useGPUDivergenceAnalysis() = 0;
1528  virtual bool isSourceOfDivergence(const Value *V) = 0;
1529  virtual bool isAlwaysUniform(const Value *V) = 0;
1530  virtual unsigned getFlatAddressSpace() = 0;
1531  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1532  Intrinsic::ID IID) const = 0;
1533  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1534  virtual bool
1535  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1536  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1537  virtual std::pair<const Value *, unsigned>
1538  getPredicatedAddrSpace(const Value *V) const = 0;
1540  Value *OldV,
1541  Value *NewV) const = 0;
1542  virtual bool isLoweredToCall(const Function *F) = 0;
1543  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1545  OptimizationRemarkEmitter *ORE) = 0;
1546  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1547  PeelingPreferences &PP) = 0;
1548  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1549  AssumptionCache &AC,
1550  TargetLibraryInfo *LibInfo,
1551  HardwareLoopInfo &HWLoopInfo) = 0;
1552  virtual bool
1555  DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1556  virtual bool emitGetActiveLaneMask() = 0;
1558  IntrinsicInst &II) = 0;
1559  virtual Optional<Value *>
1561  APInt DemandedMask, KnownBits &Known,
1562  bool &KnownBitsComputed) = 0;
1564  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1565  APInt &UndefElts2, APInt &UndefElts3,
1566  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1567  SimplifyAndSetOp) = 0;
1568  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1569  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1570  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1571  int64_t BaseOffset, bool HasBaseReg,
1572  int64_t Scale, unsigned AddrSpace,
1573  Instruction *I) = 0;
1574  virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1575  const TargetTransformInfo::LSRCost &C2) = 0;
1576  virtual bool isNumRegsMajorCostOfLSR() = 0;
1577  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1578  virtual bool canMacroFuseCmp() = 0;
1579  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1580  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1581  TargetLibraryInfo *LibInfo) = 0;
1582  virtual AddressingModeKind
1583  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1584  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1585  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1586  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1587  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1588  virtual bool isLegalBroadcastLoad(Type *ElementTy,
1589  ElementCount NumElements) const = 0;
1590  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1591  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1592  virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1593  Align Alignment) = 0;
1594  virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1595  Align Alignment) = 0;
1596  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1597  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1598  virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1599  unsigned Opcode1,
1600  const SmallBitVector &OpcodeMask) const = 0;
1601  virtual bool enableOrderedReductions() = 0;
1602  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1603  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1604  virtual bool prefersVectorizedAddressing() = 0;
1606  int64_t BaseOffset,
1607  bool HasBaseReg, int64_t Scale,
1608  unsigned AddrSpace) = 0;
1609  virtual bool LSRWithInstrQueries() = 0;
1610  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1611  virtual bool isProfitableToHoist(Instruction *I) = 0;
1612  virtual bool useAA() = 0;
1613  virtual bool isTypeLegal(Type *Ty) = 0;
1614  virtual unsigned getRegUsageForType(Type *Ty) = 0;
1615  virtual bool shouldBuildLookupTables() = 0;
1616  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1617  virtual bool shouldBuildRelLookupTables() = 0;
1618  virtual bool useColdCCForColdCall(Function &F) = 0;
1620  const APInt &DemandedElts,
1621  bool Insert,
1622  bool Extract) = 0;
1623  virtual InstructionCost
1625  ArrayRef<Type *> Tys) = 0;
1626  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1627  virtual bool supportsTailCalls() = 0;
1628  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1629  virtual MemCmpExpansionOptions
1630  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1631  virtual bool enableInterleavedAccessVectorization() = 0;
1632  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1633  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1635  unsigned BitWidth,
1636  unsigned AddressSpace,
1637  Align Alignment,
1638  bool *Fast) = 0;
1639  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1640  virtual bool haveFastSqrt(Type *Ty) = 0;
1641  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1642  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1643  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1644  const APInt &Imm, Type *Ty) = 0;
1645  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1646  TargetCostKind CostKind) = 0;
1647  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1648  const APInt &Imm, Type *Ty,
1650  Instruction *Inst = nullptr) = 0;
1651  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1652  const APInt &Imm, Type *Ty,
1653  TargetCostKind CostKind) = 0;
1654  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1655  virtual unsigned getRegisterClassForType(bool Vector,
1656  Type *Ty = nullptr) const = 0;
1657  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1658  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1659  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1660  virtual Optional<unsigned> getMaxVScale() const = 0;
1661  virtual Optional<unsigned> getVScaleForTuning() const = 0;
1662  virtual bool
1664  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1665  bool IsScalable) const = 0;
1666  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1667  virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1668  Type *ScalarValTy) const = 0;
1670  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1671  virtual unsigned getCacheLineSize() const = 0;
1672  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1674 
1675  /// \return How much before a load we should place the prefetch
1676  /// instruction. This is currently measured in number of
1677  /// instructions.
1678  virtual unsigned getPrefetchDistance() const = 0;
1679 
1680  /// \return Some HW prefetchers can handle accesses up to a certain
1681  /// constant stride. This is the minimum stride in bytes where it
1682  /// makes sense to start adding SW prefetches. The default is 1,
1683  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1684  /// even below the HW prefetcher limit, and the arguments provided are
1685  /// meant to serve as a basis for deciding this for a particular loop.
1686  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1687  unsigned NumStridedMemAccesses,
1688  unsigned NumPrefetches,
1689  bool HasCall) const = 0;
1690 
1691  /// \return The maximum number of iterations to prefetch ahead. If
1692  /// the required number of iterations is more than this number, no
1693  /// prefetching is performed.
1694  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1695 
1696  /// \return True if prefetching should also be done for writes.
1697  virtual bool enableWritePrefetching() const = 0;
1698 
1699  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1701  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1702  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1703  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1704  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1706  ArrayRef<int> Mask, int Index,
1707  VectorType *SubTp,
1709  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1710  Type *Src, CastContextHint CCH,
1712  const Instruction *I) = 0;
1713  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1714  VectorType *VecTy,
1715  unsigned Index) = 0;
1716  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1718  const Instruction *I = nullptr) = 0;
1719  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1720  Type *CondTy,
1721  CmpInst::Predicate VecPred,
1723  const Instruction *I) = 0;
1724  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1725  unsigned Index) = 0;
1726 
1727  virtual InstructionCost
1728  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1729  const APInt &DemandedDstElts,
1731 
1732  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1733  Align Alignment,
1734  unsigned AddressSpace,
1736  const Instruction *I) = 0;
1737  virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1738  Align Alignment,
1739  unsigned AddressSpace,
1741  const Instruction *I) = 0;
1742  virtual InstructionCost
1743  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1744  unsigned AddressSpace,
1746  virtual InstructionCost
1747  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1748  bool VariableMask, Align Alignment,
1750  const Instruction *I = nullptr) = 0;
1751 
1753  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1754  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1755  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1756  virtual InstructionCost
1757  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1760  virtual InstructionCost
1761  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1764  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1766  virtual InstructionCost
1769  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1770  ArrayRef<Type *> Tys,
1772  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1773  virtual InstructionCost
1774  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1775  virtual InstructionCost
1777  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1778  MemIntrinsicInfo &Info) = 0;
1779  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1781  Type *ExpectedType) = 0;
1782  virtual Type *
1784  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1785  unsigned SrcAlign, unsigned DestAlign,
1786  Optional<uint32_t> AtomicElementSize) const = 0;
1787 
1788  virtual void getMemcpyLoopResidualLoweringType(
1790  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1791  unsigned SrcAlign, unsigned DestAlign,
1792  Optional<uint32_t> AtomicCpySize) const = 0;
1793  virtual bool areInlineCompatible(const Function *Caller,
1794  const Function *Callee) const = 0;
1795  virtual bool areTypesABICompatible(const Function *Caller,
1796  const Function *Callee,
1797  const ArrayRef<Type *> &Types) const = 0;
1798  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1799  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1800  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1801  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1802  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1803  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1804  Align Alignment,
1805  unsigned AddrSpace) const = 0;
1806  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1807  Align Alignment,
1808  unsigned AddrSpace) const = 0;
1809  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1810  ElementCount VF) const = 0;
1811  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1812  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1813  unsigned ChainSizeInBytes,
1814  VectorType *VecTy) const = 0;
1815  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1816  unsigned ChainSizeInBytes,
1817  VectorType *VecTy) const = 0;
1818  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1819  ReductionFlags) const = 0;
1820  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1821  ReductionFlags) const = 0;
1822  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1823  virtual unsigned getGISelRematGlobalCost() const = 0;
1824  virtual bool enableScalableVectorization() const = 0;
1825  virtual bool supportsScalableVectors() const = 0;
1826  virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1827  Align Alignment) const = 0;
1828  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1829  virtual VPLegalization
1830  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1831 };
1832 
1833 template <typename T>
1835  T Impl;
1836 
1837 public:
1838  Model(T Impl) : Impl(std::move(Impl)) {}
1839  ~Model() override = default;
1840 
1841  const DataLayout &getDataLayout() const override {
1842  return Impl.getDataLayout();
1843  }
1844 
1845  InstructionCost
1846  getGEPCost(Type *PointeeType, const Value *Ptr,
1847  ArrayRef<const Value *> Operands,
1849  return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1850  }
1851  unsigned getInliningThresholdMultiplier() override {
1852  return Impl.getInliningThresholdMultiplier();
1853  }
1854  unsigned adjustInliningThreshold(const CallBase *CB) override {
1855  return Impl.adjustInliningThreshold(CB);
1856  }
1857  int getInlinerVectorBonusPercent() override {
1858  return Impl.getInlinerVectorBonusPercent();
1859  }
1860  InstructionCost getMemcpyCost(const Instruction *I) override {
1861  return Impl.getMemcpyCost(I);
1862  }
1863  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1864  TargetCostKind CostKind) override {
1865  return Impl.getUserCost(U, Operands, CostKind);
1866  }
1867  BranchProbability getPredictableBranchThreshold() override {
1868  return Impl.getPredictableBranchThreshold();
1869  }
1870  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1871  bool useGPUDivergenceAnalysis() override {
1872  return Impl.useGPUDivergenceAnalysis();
1873  }
1874  bool isSourceOfDivergence(const Value *V) override {
1875  return Impl.isSourceOfDivergence(V);
1876  }
1877 
1878  bool isAlwaysUniform(const Value *V) override {
1879  return Impl.isAlwaysUniform(V);
1880  }
1881 
1882  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1883 
1884  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1885  Intrinsic::ID IID) const override {
1886  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1887  }
1888 
1889  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1890  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1891  }
1892 
1893  bool
1894  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1895  return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1896  }
1897 
1898  unsigned getAssumedAddrSpace(const Value *V) const override {
1899  return Impl.getAssumedAddrSpace(V);
1900  }
1901 
1902  std::pair<const Value *, unsigned>
1903  getPredicatedAddrSpace(const Value *V) const override {
1904  return Impl.getPredicatedAddrSpace(V);
1905  }
1906 
1907  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1908  Value *NewV) const override {
1909  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1910  }
1911 
1912  bool isLoweredToCall(const Function *F) override {
1913  return Impl.isLoweredToCall(F);
1914  }
1915  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1916  UnrollingPreferences &UP,
1917  OptimizationRemarkEmitter *ORE) override {
1918  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1919  }
1920  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1921  PeelingPreferences &PP) override {
1922  return Impl.getPeelingPreferences(L, SE, PP);
1923  }
1924  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1925  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1926  HardwareLoopInfo &HWLoopInfo) override {
1927  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1928  }
1929  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1930  AssumptionCache &AC, TargetLibraryInfo *TLI,
1931  DominatorTree *DT,
1932  const LoopAccessInfo *LAI) override {
1933  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1934  }
1935  bool emitGetActiveLaneMask() override {
1936  return Impl.emitGetActiveLaneMask();
1937  }
1938  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1939  IntrinsicInst &II) override {
1940  return Impl.instCombineIntrinsic(IC, II);
1941  }
1942  Optional<Value *>
1943  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1944  APInt DemandedMask, KnownBits &Known,
1945  bool &KnownBitsComputed) override {
1946  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1947  KnownBitsComputed);
1948  }
1949  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1950  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1951  APInt &UndefElts2, APInt &UndefElts3,
1952  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1953  SimplifyAndSetOp) override {
1954  return Impl.simplifyDemandedVectorEltsIntrinsic(
1955  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1956  SimplifyAndSetOp);
1957  }
1958  bool isLegalAddImmediate(int64_t Imm) override {
1959  return Impl.isLegalAddImmediate(Imm);
1960  }
1961  bool isLegalICmpImmediate(int64_t Imm) override {
1962  return Impl.isLegalICmpImmediate(Imm);
1963  }
1964  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1965  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1966  Instruction *I) override {
1967  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1968  AddrSpace, I);
1969  }
1970  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1971  const TargetTransformInfo::LSRCost &C2) override {
1972  return Impl.isLSRCostLess(C1, C2);
1973  }
1974  bool isNumRegsMajorCostOfLSR() override {
1975  return Impl.isNumRegsMajorCostOfLSR();
1976  }
1977  bool isProfitableLSRChainElement(Instruction *I) override {
1978  return Impl.isProfitableLSRChainElement(I);
1979  }
1980  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1981  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1982  DominatorTree *DT, AssumptionCache *AC,
1983  TargetLibraryInfo *LibInfo) override {
1984  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1985  }
1987  getPreferredAddressingMode(const Loop *L,
1988  ScalarEvolution *SE) const override {
1989  return Impl.getPreferredAddressingMode(L, SE);
1990  }
1991  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1992  return Impl.isLegalMaskedStore(DataType, Alignment);
1993  }
1994  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1995  return Impl.isLegalMaskedLoad(DataType, Alignment);
1996  }
1997  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1998  return Impl.isLegalNTStore(DataType, Alignment);
1999  }
2000  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2001  return Impl.isLegalNTLoad(DataType, Alignment);
2002  }
2003  bool isLegalBroadcastLoad(Type *ElementTy,
2004  ElementCount NumElements) const override {
2005  return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2006  }
2007  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2008  return Impl.isLegalMaskedScatter(DataType, Alignment);
2009  }
2010  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2011  return Impl.isLegalMaskedGather(DataType, Alignment);
2012  }
2013  bool forceScalarizeMaskedGather(VectorType *DataType,
2014  Align Alignment) override {
2015  return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2016  }
2017  bool forceScalarizeMaskedScatter(VectorType *DataType,
2018  Align Alignment) override {
2019  return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2020  }
2021  bool isLegalMaskedCompressStore(Type *DataType) override {
2022  return Impl.isLegalMaskedCompressStore(DataType);
2023  }
2024  bool isLegalMaskedExpandLoad(Type *DataType) override {
2025  return Impl.isLegalMaskedExpandLoad(DataType);
2026  }
2027  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2028  const SmallBitVector &OpcodeMask) const override {
2029  return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2030  }
2031  bool enableOrderedReductions() override {
2032  return Impl.enableOrderedReductions();
2033  }
2034  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2035  return Impl.hasDivRemOp(DataType, IsSigned);
2036  }
2037  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2038  return Impl.hasVolatileVariant(I, AddrSpace);
2039  }
2040  bool prefersVectorizedAddressing() override {
2041  return Impl.prefersVectorizedAddressing();
2042  }
2043  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2044  int64_t BaseOffset, bool HasBaseReg,
2045  int64_t Scale,
2046  unsigned AddrSpace) override {
2047  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2048  AddrSpace);
2049  }
2050  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2051  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2052  return Impl.isTruncateFree(Ty1, Ty2);
2053  }
2054  bool isProfitableToHoist(Instruction *I) override {
2055  return Impl.isProfitableToHoist(I);
2056  }
2057  bool useAA() override { return Impl.useAA(); }
2058  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2059  unsigned getRegUsageForType(Type *Ty) override {
2060  return Impl.getRegUsageForType(Ty);
2061  }
2062  bool shouldBuildLookupTables() override {
2063  return Impl.shouldBuildLookupTables();
2064  }
2065  bool shouldBuildLookupTablesForConstant(Constant *C) override {
2066  return Impl.shouldBuildLookupTablesForConstant(C);
2067  }
2068  bool shouldBuildRelLookupTables() override {
2069  return Impl.shouldBuildRelLookupTables();
2070  }
2071  bool useColdCCForColdCall(Function &F) override {
2072  return Impl.useColdCCForColdCall(F);
2073  }
2074 
2075  InstructionCost getScalarizationOverhead(VectorType *Ty,
2076  const APInt &DemandedElts,
2077  bool Insert, bool Extract) override {
2078  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
2079  }
2080  InstructionCost
2081  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2082  ArrayRef<Type *> Tys) override {
2083  return Impl.getOperandsScalarizationOverhead(Args, Tys);
2084  }
2085 
2086  bool supportsEfficientVectorElementLoadStore() override {
2087  return Impl.supportsEfficientVectorElementLoadStore();
2088  }
2089 
2090  bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2091 
2092  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2093  return Impl.enableAggressiveInterleaving(LoopHasReductions);
2094  }
2095  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2096  bool IsZeroCmp) const override {
2097  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2098  }
2099  bool enableInterleavedAccessVectorization() override {
2100  return Impl.enableInterleavedAccessVectorization();
2101  }
2103  return Impl.enableMaskedInterleavedAccessVectorization();
2104  }
2105  bool isFPVectorizationPotentiallyUnsafe() override {
2106  return Impl.isFPVectorizationPotentiallyUnsafe();
2107  }
2108  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2109  unsigned AddressSpace, Align Alignment,
2110  bool *Fast) override {
2111  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2112  Alignment, Fast);
2113  }
2114  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2115  return Impl.getPopcntSupport(IntTyWidthInBit);
2116  }
2117  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2118 
2119  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2120  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2121  }
2122 
2123  InstructionCost getFPOpCost(Type *Ty) override {
2124  return Impl.getFPOpCost(Ty);
2125  }
2126 
2127  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2128  const APInt &Imm, Type *Ty) override {
2129  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2130  }
2131  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2132  TargetCostKind CostKind) override {
2133  return Impl.getIntImmCost(Imm, Ty, CostKind);
2134  }
2135  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2136  const APInt &Imm, Type *Ty,
2138  Instruction *Inst = nullptr) override {
2139  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2140  }
2141  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2142  const APInt &Imm, Type *Ty,
2143  TargetCostKind CostKind) override {
2144  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2145  }
2146  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2147  return Impl.getNumberOfRegisters(ClassID);
2148  }
2149  unsigned getRegisterClassForType(bool Vector,
2150  Type *Ty = nullptr) const override {
2151  return Impl.getRegisterClassForType(Vector, Ty);
2152  }
2153  const char *getRegisterClassName(unsigned ClassID) const override {
2154  return Impl.getRegisterClassName(ClassID);
2155  }
2156  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2157  return Impl.getRegisterBitWidth(K);
2158  }
2159  unsigned getMinVectorRegisterBitWidth() const override {
2160  return Impl.getMinVectorRegisterBitWidth();
2161  }
2162  Optional<unsigned> getMaxVScale() const override {
2163  return Impl.getMaxVScale();
2164  }
2165  Optional<unsigned> getVScaleForTuning() const override {
2166  return Impl.getVScaleForTuning();
2167  }
2169  TargetTransformInfo::RegisterKind K) const override {
2170  return Impl.shouldMaximizeVectorBandwidth(K);
2171  }
2172  ElementCount getMinimumVF(unsigned ElemWidth,
2173  bool IsScalable) const override {
2174  return Impl.getMinimumVF(ElemWidth, IsScalable);
2175  }
2176  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2177  return Impl.getMaximumVF(ElemWidth, Opcode);
2178  }
2179  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2180  Type *ScalarValTy) const override {
2181  return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2182  }
2184  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2185  return Impl.shouldConsiderAddressTypePromotion(
2186  I, AllowPromotionWithoutCommonHeader);
2187  }
2188  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2189  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2190  return Impl.getCacheSize(Level);
2191  }
2192  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2193  return Impl.getCacheAssociativity(Level);
2194  }
2195 
2196  /// Return the preferred prefetch distance in terms of instructions.
2197  ///
2198  unsigned getPrefetchDistance() const override {
2199  return Impl.getPrefetchDistance();
2200  }
2201 
2202  /// Return the minimum stride necessary to trigger software
2203  /// prefetching.
2204  ///
2205  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2206  unsigned NumStridedMemAccesses,
2207  unsigned NumPrefetches,
2208  bool HasCall) const override {
2209  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2210  NumPrefetches, HasCall);
2211  }
2212 
2213  /// Return the maximum prefetch distance in terms of loop
2214  /// iterations.
2215  ///
2216  unsigned getMaxPrefetchIterationsAhead() const override {
2217  return Impl.getMaxPrefetchIterationsAhead();
2218  }
2219 
2220  /// \return True if prefetching should also be done for writes.
2221  bool enableWritePrefetching() const override {
2222  return Impl.enableWritePrefetching();
2223  }
2224 
2225  unsigned getMaxInterleaveFactor(unsigned VF) override {
2226  return Impl.getMaxInterleaveFactor(VF);
2227  }
2228  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2229  unsigned &JTSize,
2230  ProfileSummaryInfo *PSI,
2231  BlockFrequencyInfo *BFI) override {
2232  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2233  }
2234  InstructionCost getArithmeticInstrCost(
2235  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2236  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2237  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2238  ArrayRef<const Value *> Args,
2239  const Instruction *CxtI = nullptr) override {
2240  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2241  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2242  }
2243  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2244  ArrayRef<int> Mask, int Index,
2245  VectorType *SubTp,
2246  ArrayRef<const Value *> Args) override {
2247  return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
2248  }
2249  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2250  CastContextHint CCH,
2252  const Instruction *I) override {
2253  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2254  }
2255  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2256  VectorType *VecTy,
2257  unsigned Index) override {
2258  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2259  }
2260  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2261  const Instruction *I = nullptr) override {
2262  return Impl.getCFInstrCost(Opcode, CostKind, I);
2263  }
2264  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2265  CmpInst::Predicate VecPred,
2267  const Instruction *I) override {
2268  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2269  }
2270  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2271  unsigned Index) override {
2272  return Impl.getVectorInstrCost(Opcode, Val, Index);
2273  }
2274  InstructionCost
2275  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2276  const APInt &DemandedDstElts,
2277  TTI::TargetCostKind CostKind) override {
2278  return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2279  DemandedDstElts, CostKind);
2280  }
2281  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2282  unsigned AddressSpace,
2284  const Instruction *I) override {
2285  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2286  CostKind, I);
2287  }
2288  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2289  unsigned AddressSpace,
2291  const Instruction *I) override {
2292  return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2293  CostKind, I);
2294  }
2295  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2296  Align Alignment, unsigned AddressSpace,
2297  TTI::TargetCostKind CostKind) override {
2298  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2299  CostKind);
2300  }
2301  InstructionCost
2302  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2303  bool VariableMask, Align Alignment,
2305  const Instruction *I = nullptr) override {
2306  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2307  Alignment, CostKind, I);
2308  }
2309  InstructionCost getInterleavedMemoryOpCost(
2310  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2311  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2312  bool UseMaskForCond, bool UseMaskForGaps) override {
2313  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2314  Alignment, AddressSpace, CostKind,
2315  UseMaskForCond, UseMaskForGaps);
2316  }
2317  InstructionCost
2318  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2319  Optional<FastMathFlags> FMF,
2320  TTI::TargetCostKind CostKind) override {
2321  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2322  }
2323  InstructionCost
2324  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2325  TTI::TargetCostKind CostKind) override {
2326  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2327  }
2328  InstructionCost getExtendedAddReductionCost(
2329  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2331  return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2332  CostKind);
2333  }
2334  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2335  TTI::TargetCostKind CostKind) override {
2336  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2337  }
2338  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2339  ArrayRef<Type *> Tys,
2340  TTI::TargetCostKind CostKind) override {
2341  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2342  }
2343  unsigned getNumberOfParts(Type *Tp) override {
2344  return Impl.getNumberOfParts(Tp);
2345  }
2346  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2347  const SCEV *Ptr) override {
2348  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2349  }
2350  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2351  return Impl.getCostOfKeepingLiveOverCall(Tys);
2352  }
2353  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2354  MemIntrinsicInfo &Info) override {
2355  return Impl.getTgtMemIntrinsic(Inst, Info);
2356  }
2357  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2358  return Impl.getAtomicMemIntrinsicMaxElementSize();
2359  }
2360  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2361  Type *ExpectedType) override {
2362  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2363  }
2365  LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2366  unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2367  Optional<uint32_t> AtomicElementSize) const override {
2368  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2369  DestAddrSpace, SrcAlign, DestAlign,
2370  AtomicElementSize);
2371  }
2373  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2374  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2375  unsigned SrcAlign, unsigned DestAlign,
2376  Optional<uint32_t> AtomicCpySize) const override {
2377  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2378  SrcAddrSpace, DestAddrSpace,
2379  SrcAlign, DestAlign, AtomicCpySize);
2380  }
2381  bool areInlineCompatible(const Function *Caller,
2382  const Function *Callee) const override {
2383  return Impl.areInlineCompatible(Caller, Callee);
2384  }
2385  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2386  const ArrayRef<Type *> &Types) const override {
2387  return Impl.areTypesABICompatible(Caller, Callee, Types);
2388  }
2389  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2390  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2391  }
2392  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2393  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2394  }
2395  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2396  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2397  }
2398  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2399  return Impl.isLegalToVectorizeLoad(LI);
2400  }
2401  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2402  return Impl.isLegalToVectorizeStore(SI);
2403  }
2404  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2405  unsigned AddrSpace) const override {
2406  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2407  AddrSpace);
2408  }
2409  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2410  unsigned AddrSpace) const override {
2411  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2412  AddrSpace);
2413  }
2414  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2415  ElementCount VF) const override {
2416  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2417  }
2418  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2419  return Impl.isElementTypeLegalForScalableVector(Ty);
2420  }
2421  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2422  unsigned ChainSizeInBytes,
2423  VectorType *VecTy) const override {
2424  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2425  }
2426  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2427  unsigned ChainSizeInBytes,
2428  VectorType *VecTy) const override {
2429  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2430  }
2431  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2432  ReductionFlags Flags) const override {
2433  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2434  }
2435  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2436  ReductionFlags Flags) const override {
2437  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2438  }
2439  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2440  return Impl.shouldExpandReduction(II);
2441  }
2442 
2443  unsigned getGISelRematGlobalCost() const override {
2444  return Impl.getGISelRematGlobalCost();
2445  }
2446 
2447  bool supportsScalableVectors() const override {
2448  return Impl.supportsScalableVectors();
2449  }
2450 
2451  bool enableScalableVectorization() const override {
2452  return Impl.enableScalableVectorization();
2453  }
2454 
2455  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2456  Align Alignment) const override {
2457  return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2458  }
2459 
2460  InstructionCost getInstructionLatency(const Instruction *I) override {
2461  return Impl.getInstructionLatency(I);
2462  }
2463 
2465  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2466  return Impl.getVPLegalizationStrategy(PI);
2467  }
2468 };
2469 
2470 template <typename T>
2472  : TTIImpl(new Model<T>(Impl)) {}
2473 
2474 /// Analysis pass providing the \c TargetTransformInfo.
2475 ///
2476 /// The core idea of the TargetIRAnalysis is to expose an interface through
2477 /// which LLVM targets can analyze and provide information about the middle
2478 /// end's target-independent IR. This supports use cases such as target-aware
2479 /// cost modeling of IR constructs.
2480 ///
2481 /// This is a function analysis because much of the cost modeling for targets
2482 /// is done in a subtarget specific way and LLVM supports compiling different
2483 /// functions targeting different subtargets in order to support runtime
2484 /// dispatch according to the observed subtarget.
2485 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2486 public:
2488 
2489  /// Default construct a target IR analysis.
2490  ///
2491  /// This will use the module's datalayout to construct a baseline
2492  /// conservative TTI result.
2493  TargetIRAnalysis();
2494 
2495  /// Construct an IR analysis pass around a target-provide callback.
2496  ///
2497  /// The callback will be called with a particular function for which the TTI
2498  /// is needed and must return a TTI object for that function.
2499  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2500 
2501  // Value semantics. We spell out the constructors for MSVC.
2503  : TTICallback(Arg.TTICallback) {}
2505  : TTICallback(std::move(Arg.TTICallback)) {}
2507  TTICallback = RHS.TTICallback;
2508  return *this;
2509  }
2511  TTICallback = std::move(RHS.TTICallback);
2512  return *this;
2513  }
2514 
2516 
2517 private:
2519  static AnalysisKey Key;
2520 
2521  /// The callback used to produce a result.
2522  ///
2523  /// We use a completely opaque callback so that targets can provide whatever
2524  /// mechanism they desire for constructing the TTI for a given function.
2525  ///
2526  /// FIXME: Should we really use std::function? It's relatively inefficient.
2527  /// It might be possible to arrange for even stateful callbacks to outlive
2528  /// the analysis and thus use a function_ref which would be lighter weight.
2529  /// This may also be less error prone as the callback is likely to reference
2530  /// the external TargetMachine, and that reference needs to never dangle.
2531  std::function<Result(const Function &)> TTICallback;
2532 
2533  /// Helper function used as the callback in the default constructor.
2534  static Result getDefaultTTI(const Function &F);
2535 };
2536 
2537 /// Wrapper pass for TargetTransformInfo.
2538 ///
2539 /// This pass can be constructed from a TTI object which it stores internally
2540 /// and is queried by passes.
2542  TargetIRAnalysis TIRA;
2544 
2545  virtual void anchor();
2546 
2547 public:
2548  static char ID;
2549 
2550  /// We must provide a default constructor for the pass but it should
2551  /// never be used.
2552  ///
2553  /// Use the constructor below or call one of the creation routines.
2555 
2557 
2559 };
2560 
2561 /// Create an analysis pass wrapper around a TTI object.
2562 ///
2563 /// This analysis pass just holds the TTI instance and makes it available to
2564 /// clients.
2566 
2567 } // namespace llvm
2568 
2569 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1403
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:288
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:481
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:460
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:888
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2485
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:488
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:421
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:456
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1401
FMF.h
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:306
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:75
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:294
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:633
llvm::TargetTransformInfo::Concept::enableOrderedReductions
virtual bool enableOrderedReductions()=0
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfo::Concept::areTypesABICompatible
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1349
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::TargetTransformInfo::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: TargetTransformInfo.cpp:641
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:472
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:279
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:624
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:404
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:906
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:852
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1212
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:369
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:628
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:880
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1097
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:644
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1225
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:942
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:135
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:544
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:810
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:461
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:150
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:921
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:933
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:270
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:149
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:343
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:507
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalBroadcastLoad
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
Definition: TargetTransformInfo.cpp:405
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2506
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2504
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)=0
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:500
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:465
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1451
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:492
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:423
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:103
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:484
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:655
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:505
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1458
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:190
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:401
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:396
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:476
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicElementSize) const =0
llvm::Optional
Definition: APInt.h:33
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:843
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:231
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:579
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:204
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:606
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
VectorType
Definition: ItaniumDemangle.h:1065
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:596
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual bool emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:539
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:896
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:516
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:493
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1086
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:989
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1507
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:420
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:886
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
Definition: TargetTransformInfo.cpp:265
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:160
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1221
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:105
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:422
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:151
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:328
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:421
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:426
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1105
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1018
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:484
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:101
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:894
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:551
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:650
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:87
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:365
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:781
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:994
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:1043
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:78
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2548
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:261
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:561
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2502
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:885
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:381
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1106
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:239
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:496
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:235
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1063
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:589
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:937
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1109
llvm::TargetTransformInfo::Concept::getExtendedAddReductionCost
virtual InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1029
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:510
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:502
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:440
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1113
llvm::TargetTransformInfo::Concept::supportsTailCalls
virtual bool supportsTailCalls()=0
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1352
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:109
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:391
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:243
llvm::TargetTransformInfo::Concept::getReplicationShuffleCost
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:318
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:686
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:107
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:569
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:667
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:933
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetTransformInfo::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfo.cpp:1023
llvm::None
const NoneType None
Definition: None.h:24
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1476
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:452
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:595
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:436
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:565
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1232
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1465
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:887
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1452
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:251
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:892
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::Concept::getInstructionLatency
virtual InstructionCost getInstructionLatency(const Instruction *I)=0
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:496
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1353
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicCpySize) const =0
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:417
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:255
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1471
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:410
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2541
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported.
Definition: TargetTransformInfo.cpp:301
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1091
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:909
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1406
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:907
llvm::TargetTransformInfo::forceScalarizeMaskedGather
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
Definition: TargetTransformInfo.cpp:426
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2487
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:705
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1473
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:619
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::TargetTransformInfo::isLegalAltInstr
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
Definition: TargetTransformInfo.cpp:415
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:431
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:981
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:665
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:559
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::TargetTransformInfo::Concept::getVPMemoryOpCost
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
Definition: TargetTransformInfo.h:326
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:510
llvm::TargetTransformInfo::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfo.cpp:871
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:546
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:279
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
Definition: TargetTransformInfo.cpp:356
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:900
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1456
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1675
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:470
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:964
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1051
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:209
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:339
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:427
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:551
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:284
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:898
llvm::TargetTransformInfo::Concept::getVScaleForTuning
virtual Optional< unsigned > getVScaleForTuning() const =0
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:824
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1405
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:904
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:83
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:792
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:373
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:474
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:457
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:787
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:542
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:521
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:535
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1068
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:386
llvm::TargetTransformInfo::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:275
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:222
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:467
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:448
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:311
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:677
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:615
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:642
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:697
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1350
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:905
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1072
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:891
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:1047
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1351
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:795
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1188
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=None) const
Definition: TargetTransformInfo.cpp:771
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::TargetTransformInfo::enableOrderedReductions
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
Definition: TargetTransformInfo.cpp:444
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:637
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedGather
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getStoreMinimumVF
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicCpySize=None) const
Definition: TargetTransformInfo.cpp:1008
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicElementSize=None) const
Definition: TargetTransformInfo.cpp:999
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:682
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:671
llvm::TargetTransformInfo::supportsTailCalls
bool supportsTailCalls() const
If the target supports tail calls.
Definition: TargetTransformInfo.cpp:525
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
std
Definition: BitVector.h:851
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:543
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:424
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2510
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:99
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:498
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:391
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:260
llvm::TypeSize
Definition: TypeSize.h:435
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1039
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:490
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
Definition: TargetTransformInfo.cpp:645
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:547
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetTransformInfo::Concept::isLegalBroadcastLoad
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:242
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:425
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: TargetTransformInfo.cpp:1117
llvm::TargetTransformInfo::forceScalarizeMaskedScatter
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
Definition: TargetTransformInfo.cpp:431
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1194
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:97
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:195
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:863
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:200
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:891
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:933
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:215
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2471
llvm::TargetTransformInfo::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:973
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
llvm::TargetTransformInfo::Concept::isLegalAltInstr
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:805
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:955
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:148
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:933
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:197
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
SmallBitVector.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:439
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()=default
llvm::TargetTransformInfo::Concept::enableScalableVectorization
virtual bool enableScalableVectorization() const =0
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::Concept::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1454
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:690
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1034
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:361
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1101
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:334
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1079
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1354
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:50
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1057
llvm::SmallVectorImpl< const Value * >
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:990
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:529
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:453
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:929
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:710
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:835
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:81
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3230
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:759
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetTransformInfo::Concept::getUserCost
virtual InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:478
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:501
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual unsigned getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::TargetTransformInfo::Concept::canHaveNonUndefGlobalInitializerInAddressSpace
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:908
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:985
llvm::TargetTransformInfo::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: TargetTransformInfo.cpp:660
llvm::TargetTransformInfo::getRegUsageForType
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:488
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:482
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:701
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:573
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:153
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:645
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:893
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:643
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:247
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:347
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:949
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:450
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedScatter
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid())
Definition: TargetTransformInfo.cpp:60