LLVM  10.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/Dominators.h"
34 #include <functional>
35 
36 namespace llvm {
37 
38 namespace Intrinsic {
39 enum ID : unsigned;
40 }
41 
42 class AssumptionCache;
43 class BranchInst;
44 class Function;
45 class GlobalValue;
46 class IntrinsicInst;
47 class LoadInst;
48 class Loop;
49 class SCEV;
50 class ScalarEvolution;
51 class StoreInst;
52 class SwitchInst;
53 class TargetLibraryInfo;
54 class Type;
55 class User;
56 class Value;
57 
58 /// Information about a load/store intrinsic defined by the target.
60  /// This is the pointer that the intrinsic is loading from or storing to.
61  /// If this is non-null, then analysis/optimization passes can assume that
62  /// this intrinsic is functionally equivalent to a load/store from this
63  /// pointer.
64  Value *PtrVal = nullptr;
65 
66  // Ordering for atomic operations.
68 
69  // Same Id is set by the target for corresponding load/store intrinsics.
70  unsigned short MatchingId = 0;
71 
72  bool ReadMem = false;
73  bool WriteMem = false;
74  bool IsVolatile = false;
75 
76  bool isUnordered() const {
77  return (Ordering == AtomicOrdering::NotAtomic ||
78  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
79  }
80 };
81 
82 /// Attributes of a target dependent hardware loop.
84  HardwareLoopInfo() = delete;
85  HardwareLoopInfo(Loop *L) : L(L) {}
86  Loop *L = nullptr;
87  BasicBlock *ExitBlock = nullptr;
88  BranchInst *ExitBranch = nullptr;
89  const SCEV *ExitCount = nullptr;
90  IntegerType *CountType = nullptr;
91  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
92  // value in every iteration.
93  bool IsNestingLegal = false; // Can a hardware loop be a parent to
94  // another hardware loop?
95  bool CounterInReg = false; // Should loop counter be updated in
96  // the loop via a phi?
97  bool PerformEntryTest = false; // Generate the intrinsic which also performs
98  // icmp ne zero on the loop counter value and
99  // produces an i1 to guard the loop entry.
100  bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
101  DominatorTree &DT, bool ForceNestedLoop = false,
102  bool ForceHardwareLoopPHI = false);
103  bool canAnalyze(LoopInfo &LI);
104 };
105 
106 /// This pass provides access to the codegen interfaces that are needed
107 /// for IR-level transformations.
109 public:
110  /// Construct a TTI object using a type implementing the \c Concept
111  /// API below.
112  ///
113  /// This is used by targets to construct a TTI wrapping their target-specific
114  /// implementation that encodes appropriate costs for their target.
115  template <typename T> TargetTransformInfo(T Impl);
116 
117  /// Construct a baseline TTI object using a minimal implementation of
118  /// the \c Concept API below.
119  ///
120  /// The TTI implementation will reflect the information in the DataLayout
121  /// provided if non-null.
122  explicit TargetTransformInfo(const DataLayout &DL);
123 
124  // Provide move semantics.
126  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
127 
128  // We need to define the destructor out-of-line to define our sub-classes
129  // out-of-line.
131 
132  /// Handle the invalidation of this information.
133  ///
134  /// When used as a result of \c TargetIRAnalysis this method will be called
135  /// when the function this was computed for changes. When it returns false,
136  /// the information is preserved across those changes.
139  // FIXME: We should probably in some way ensure that the subtarget
140  // information for a function hasn't changed.
141  return false;
142  }
143 
144  /// \name Generic Target Information
145  /// @{
146 
147  /// The kind of cost model.
148  ///
149  /// There are several different cost models that can be customized by the
150  /// target. The normalization of each cost model may be target specific.
152  TCK_RecipThroughput, ///< Reciprocal throughput.
153  TCK_Latency, ///< The latency of instruction.
154  TCK_CodeSize ///< Instruction code size.
155  };
156 
157  /// Query the cost of a specified instruction.
158  ///
159  /// Clients should use this interface to query the cost of an existing
160  /// instruction. The instruction must have a valid parent (basic block).
161  ///
162  /// Note, this method does not cache the cost calculation and it
163  /// can be expensive in some cases.
164  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
165  switch (kind){
166  case TCK_RecipThroughput:
167  return getInstructionThroughput(I);
168 
169  case TCK_Latency:
170  return getInstructionLatency(I);
171 
172  case TCK_CodeSize:
173  return getUserCost(I);
174  }
175  llvm_unreachable("Unknown instruction cost kind");
176  }
177 
178  /// Underlying constants for 'cost' values in this interface.
179  ///
180  /// Many APIs in this interface return a cost. This enum defines the
181  /// fundamental values that should be used to interpret (and produce) those
182  /// costs. The costs are returned as an int rather than a member of this
183  /// enumeration because it is expected that the cost of one IR instruction
184  /// may have a multiplicative factor to it or otherwise won't fit directly
185  /// into the enum. Moreover, it is common to sum or average costs which works
186  /// better as simple integral values. Thus this enum only provides constants.
187  /// Also note that the returned costs are signed integers to make it natural
188  /// to add, subtract, and test with zero (a common boundary condition). It is
189  /// not expected that 2^32 is a realistic cost to be modeling at any point.
190  ///
191  /// Note that these costs should usually reflect the intersection of code-size
192  /// cost and execution cost. A free instruction is typically one that folds
193  /// into another instruction. For example, reg-to-reg moves can often be
194  /// skipped by renaming the registers in the CPU, but they still are encoded
195  /// and thus wouldn't be considered 'free' here.
197  TCC_Free = 0, ///< Expected to fold away in lowering.
198  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
199  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
200  };
201 
202  /// Estimate the cost of a specific operation when lowered.
203  ///
204  /// Note that this is designed to work on an arbitrary synthetic opcode, and
205  /// thus work for hypothetical queries before an instruction has even been
206  /// formed. However, this does *not* work for GEPs, and must not be called
207  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
208  /// analyzing a GEP's cost required more information.
209  ///
210  /// Typically only the result type is required, and the operand type can be
211  /// omitted. However, if the opcode is one of the cast instructions, the
212  /// operand type is required.
213  ///
214  /// The returned cost is defined in terms of \c TargetCostConstants, see its
215  /// comments for a detailed explanation of the cost values.
216  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
217 
218  /// Estimate the cost of a GEP operation when lowered.
219  ///
220  /// The contract for this function is the same as \c getOperationCost except
221  /// that it supports an interface that provides extra information specific to
222  /// the GEP operation.
223  int getGEPCost(Type *PointeeType, const Value *Ptr,
225 
226  /// Estimate the cost of a EXT operation when lowered.
227  ///
228  /// The contract for this function is the same as \c getOperationCost except
229  /// that it supports an interface that provides extra information specific to
230  /// the EXT operation.
231  int getExtCost(const Instruction *I, const Value *Src) const;
232 
233  /// Estimate the cost of a function call when lowered.
234  ///
235  /// The contract for this is the same as \c getOperationCost except that it
236  /// supports an interface that provides extra information specific to call
237  /// instructions.
238  ///
239  /// This is the most basic query for estimating call cost: it only knows the
240  /// function type and (potentially) the number of arguments at the call site.
241  /// The latter is only interesting for varargs function types.
242  int getCallCost(FunctionType *FTy, int NumArgs = -1,
243  const User *U = nullptr) const;
244 
245  /// Estimate the cost of calling a specific function when lowered.
246  ///
247  /// This overload adds the ability to reason about the particular function
248  /// being called in the event it is a library call with special lowering.
249  int getCallCost(const Function *F, int NumArgs = -1,
250  const User *U = nullptr) const;
251 
252  /// Estimate the cost of calling a specific function when lowered.
253  ///
254  /// This overload allows specifying a set of candidate argument values.
255  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
256  const User *U = nullptr) const;
257 
258  /// \returns A value by which our inlining threshold should be multiplied.
259  /// This is primarily used to bump up the inlining threshold wholesale on
260  /// targets where calls are unusually expensive.
261  ///
262  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
263  /// individual classes of instructions would be better.
264  unsigned getInliningThresholdMultiplier() const;
265 
266  /// \returns Vector bonus in percent.
267  ///
268  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
269  /// and apply this bonus based on the percentage of vector instructions. A
270  /// bonus is applied if the vector instructions exceed 50% and half that amount
271  /// is applied if it exceeds 10%. Note that these bonuses are some what
272  /// arbitrary and evolved over time by accident as much as because they are
273  /// principled bonuses.
274  /// FIXME: It would be nice to base the bonus values on something more
275  /// scientific. A target may has no bonus on vector instructions.
276  int getInlinerVectorBonusPercent() const;
277 
278  /// Estimate the cost of an intrinsic when lowered.
279  ///
280  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
281  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
282  ArrayRef<Type *> ParamTys,
283  const User *U = nullptr) const;
284 
285  /// Estimate the cost of an intrinsic when lowered.
286  ///
287  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
288  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
290  const User *U = nullptr) const;
291 
292  /// \return the expected cost of a memcpy, which could e.g. depend on the
293  /// source/destination type and alignment and the number of bytes copied.
294  int getMemcpyCost(const Instruction *I) const;
295 
296  /// \return The estimated number of case clusters when lowering \p 'SI'.
297  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
298  /// table.
299  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
300  unsigned &JTSize) const;
301 
302  /// Estimate the cost of a given IR user when lowered.
303  ///
304  /// This can estimate the cost of either a ConstantExpr or Instruction when
305  /// lowered. It has two primary advantages over the \c getOperationCost and
306  /// \c getGEPCost above, and one significant disadvantage: it can only be
307  /// used when the IR construct has already been formed.
308  ///
309  /// The advantages are that it can inspect the SSA use graph to reason more
310  /// accurately about the cost. For example, all-constant-GEPs can often be
311  /// folded into a load or other instruction, but if they are used in some
312  /// other context they may not be folded. This routine can distinguish such
313  /// cases.
314  ///
315  /// \p Operands is a list of operands which can be a result of transformations
316  /// of the current operands. The number of the operands on the list must equal
317  /// to the number of the current operands the IR user has. Their order on the
318  /// list must be the same as the order of the current operands the IR user
319  /// has.
320  ///
321  /// The returned cost is defined in terms of \c TargetCostConstants, see its
322  /// comments for a detailed explanation of the cost values.
323  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
324 
325  /// This is a helper function which calls the two-argument getUserCost
326  /// with \p Operands which are the current operands U has.
327  int getUserCost(const User *U) const {
329  U->value_op_end());
330  return getUserCost(U, Operands);
331  }
332 
333  /// Return true if branch divergence exists.
334  ///
335  /// Branch divergence has a significantly negative impact on GPU performance
336  /// when threads in the same wavefront take different paths due to conditional
337  /// branches.
338  bool hasBranchDivergence() const;
339 
340  /// Returns whether V is a source of divergence.
341  ///
342  /// This function provides the target-dependent information for
343  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
344  /// builds the dependency graph, and then runs the reachability algorithm
345  /// starting with the sources of divergence.
346  bool isSourceOfDivergence(const Value *V) const;
347 
348  // Returns true for the target specific
349  // set of operations which produce uniform result
350  // even taking non-uniform arguments
351  bool isAlwaysUniform(const Value *V) const;
352 
353  /// Returns the address space ID for a target's 'flat' address space. Note
354  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
355  /// refers to as the generic address space. The flat address space is a
356  /// generic address space that can be used access multiple segments of memory
357  /// with different address spaces. Access of a memory location through a
358  /// pointer with this address space is expected to be legal but slower
359  /// compared to the same memory location accessed through a pointer with a
360  /// different address space.
361  //
362  /// This is for targets with different pointer representations which can
363  /// be converted with the addrspacecast instruction. If a pointer is converted
364  /// to this address space, optimizations should attempt to replace the access
365  /// with the source address space.
366  ///
367  /// \returns ~0u if the target does not have such a flat address space to
368  /// optimize away.
369  unsigned getFlatAddressSpace() const;
370 
371  /// Return any intrinsic address operand indexes which may be rewritten if
372  /// they use a flat address space pointer.
373  ///
374  /// \returns true if the intrinsic was handled.
375  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
376  Intrinsic::ID IID) const;
377 
378  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
379  /// NewV, which has a different address space. This should happen for every
380  /// operand index that collectFlatAddressOperands returned for the intrinsic.
381  /// \returns true if the intrinsic /// was handled.
382  bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
383  Value *OldV, Value *NewV) const;
384 
385  /// Test whether calls to a function lower to actual program function
386  /// calls.
387  ///
388  /// The idea is to test whether the program is likely to require a 'call'
389  /// instruction or equivalent in order to call the given function.
390  ///
391  /// FIXME: It's not clear that this is a good or useful query API. Client's
392  /// should probably move to simpler cost metrics using the above.
393  /// Alternatively, we could split the cost interface into distinct code-size
394  /// and execution-speed costs. This would allow modelling the core of this
395  /// query more accurately as a call is a single small instruction, but
396  /// incurs significant execution cost.
397  bool isLoweredToCall(const Function *F) const;
398 
399  struct LSRCost {
400  /// TODO: Some of these could be merged. Also, a lexical ordering
401  /// isn't always optimal.
402  unsigned Insns;
403  unsigned NumRegs;
404  unsigned AddRecCost;
405  unsigned NumIVMuls;
406  unsigned NumBaseAdds;
407  unsigned ImmCost;
408  unsigned SetupCost;
409  unsigned ScaleCost;
410  };
411 
412  /// Parameters that control the generic loop unrolling transformation.
414  /// The cost threshold for the unrolled loop. Should be relative to the
415  /// getUserCost values returned by this API, and the expectation is that
416  /// the unrolled loop's instructions when run through that interface should
417  /// not exceed this cost. However, this is only an estimate. Also, specific
418  /// loops may be unrolled even with a cost above this threshold if deemed
419  /// profitable. Set this to UINT_MAX to disable the loop body cost
420  /// restriction.
421  unsigned Threshold;
422  /// If complete unrolling will reduce the cost of the loop, we will boost
423  /// the Threshold by a certain percent to allow more aggressive complete
424  /// unrolling. This value provides the maximum boost percentage that we
425  /// can apply to Threshold (The value should be no less than 100).
426  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
427  /// MaxPercentThresholdBoost / 100)
428  /// E.g. if complete unrolling reduces the loop execution time by 50%
429  /// then we boost the threshold by the factor of 2x. If unrolling is not
430  /// expected to reduce the running time, then we do not increase the
431  /// threshold.
433  /// The cost threshold for the unrolled loop when optimizing for size (set
434  /// to UINT_MAX to disable).
436  /// The cost threshold for the unrolled loop, like Threshold, but used
437  /// for partial/runtime unrolling (set to UINT_MAX to disable).
439  /// The cost threshold for the unrolled loop when optimizing for size, like
440  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
441  /// UINT_MAX to disable).
443  /// A forced unrolling factor (the number of concatenated bodies of the
444  /// original loop in the unrolled loop body). When set to 0, the unrolling
445  /// transformation will select an unrolling factor based on the current cost
446  /// threshold and other factors.
447  unsigned Count;
448  /// A forced peeling factor (the number of bodied of the original loop
449  /// that should be peeled off before the loop body). When set to 0, the
450  /// unrolling transformation will select a peeling factor based on profile
451  /// information and other factors.
452  unsigned PeelCount;
453  /// Default unroll count for loops with run-time trip count.
455  // Set the maximum unrolling factor. The unrolling factor may be selected
456  // using the appropriate cost threshold, but may not exceed this number
457  // (set to UINT_MAX to disable). This does not apply in cases where the
458  // loop is being fully unrolled.
459  unsigned MaxCount;
460  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
461  /// applies even if full unrolling is selected. This allows a target to fall
462  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
464  // Represents number of instructions optimized when "back edge"
465  // becomes "fall through" in unrolled loop.
466  // For now we count a conditional branch on a backedge and a comparison
467  // feeding it.
468  unsigned BEInsns;
469  /// Allow partial unrolling (unrolling of loops to expand the size of the
470  /// loop body, not only to eliminate small constant-trip-count loops).
471  bool Partial;
472  /// Allow runtime unrolling (unrolling of loops to expand the size of the
473  /// loop body even when the number of loop iterations is not known at
474  /// compile time).
475  bool Runtime;
476  /// Allow generation of a loop remainder (extra iterations after unroll).
478  /// Allow emitting expensive instructions (such as divisions) when computing
479  /// the trip count of a loop for runtime unrolling.
481  /// Apply loop unroll on any kind of loop
482  /// (mainly to loops that fail runtime unrolling).
483  bool Force;
484  /// Allow using trip count upper bound to unroll loops.
486  /// Allow peeling off loop iterations.
488  /// Allow unrolling of all the iterations of the runtime loop remainder.
490  /// Allow unroll and jam. Used to enable unroll and jam for the target.
492  /// Allow peeling basing on profile. Uses to enable peeling off all
493  /// iterations basing on provided profile.
494  /// If the value is true the peeling cost model can decide to peel only
495  /// some iterations and in this case it will set this to false.
497  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
498  /// value above is used during unroll and jam for the outer loop size.
499  /// This value is used in the same manner to limit the size of the inner
500  /// loop.
502  };
503 
504  /// Get target-customized preferences for the generic loop unrolling
505  /// transformation. The caller will initialize UP with the current
506  /// target-independent defaults.
507  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
508  UnrollingPreferences &UP) const;
509 
510  /// Query the target whether it would be profitable to convert the given loop
511  /// into a hardware loop.
512  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
513  AssumptionCache &AC,
514  TargetLibraryInfo *LibInfo,
515  HardwareLoopInfo &HWLoopInfo) const;
516 
517  /// @}
518 
519  /// \name Scalar Target Information
520  /// @{
521 
522  /// Flags indicating the kind of support for population count.
523  ///
524  /// Compared to the SW implementation, HW support is supposed to
525  /// significantly boost the performance when the population is dense, and it
526  /// may or may not degrade performance if the population is sparse. A HW
527  /// support is considered as "Fast" if it can outperform, or is on a par
528  /// with, SW implementation when the population is sparse; otherwise, it is
529  /// considered as "Slow".
530  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
531 
532  /// Return true if the specified immediate is legal add immediate, that
533  /// is the target has add instructions which can add a register with the
534  /// immediate without having to materialize the immediate into a register.
535  bool isLegalAddImmediate(int64_t Imm) const;
536 
537  /// Return true if the specified immediate is legal icmp immediate,
538  /// that is the target has icmp instructions which can compare a register
539  /// against the immediate without having to materialize the immediate into a
540  /// register.
541  bool isLegalICmpImmediate(int64_t Imm) const;
542 
543  /// Return true if the addressing mode represented by AM is legal for
544  /// this target, for a load/store of the specified type.
545  /// The type may be VoidTy, in which case only return true if the addressing
546  /// mode is legal for a load/store of any legal type.
547  /// If target returns true in LSRWithInstrQueries(), I may be valid.
548  /// TODO: Handle pre/postinc as well.
549  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
550  bool HasBaseReg, int64_t Scale,
551  unsigned AddrSpace = 0,
552  Instruction *I = nullptr) const;
553 
554  /// Return true if LSR cost of C1 is lower than C1.
555  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
556  TargetTransformInfo::LSRCost &C2) const;
557 
558  /// Return true if the target can fuse a compare and branch.
559  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
560  /// calculation for the instructions in a loop.
561  bool canMacroFuseCmp() const;
562 
563  /// Return true if the target can save a compare for loop count, for example
564  /// hardware loop saves a compare.
565  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
567  TargetLibraryInfo *LibInfo) const;
568 
569  /// \return True is LSR should make efforts to create/preserve post-inc
570  /// addressing mode expressions.
571  bool shouldFavorPostInc() const;
572 
573  /// Return true if LSR should make efforts to generate indexed addressing
574  /// modes that operate across loop iterations.
575  bool shouldFavorBackedgeIndex(const Loop *L) const;
576 
577  /// Return true if the target supports masked store.
578  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
579  /// Return true if the target supports masked load.
580  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
581 
582  /// Return true if the target supports nontemporal store.
583  bool isLegalNTStore(Type *DataType, Align Alignment) const;
584  /// Return true if the target supports nontemporal load.
585  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
586 
587  /// Return true if the target supports masked scatter.
588  bool isLegalMaskedScatter(Type *DataType) const;
589  /// Return true if the target supports masked gather.
590  bool isLegalMaskedGather(Type *DataType) const;
591 
592  /// Return true if the target supports masked compress store.
593  bool isLegalMaskedCompressStore(Type *DataType) const;
594  /// Return true if the target supports masked expand load.
595  bool isLegalMaskedExpandLoad(Type *DataType) const;
596 
597  /// Return true if the target has a unified operation to calculate division
598  /// and remainder. If so, the additional implicit multiplication and
599  /// subtraction required to calculate a remainder from division are free. This
600  /// can enable more aggressive transformations for division and remainder than
601  /// would typically be allowed using throughput or size cost models.
602  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
603 
604  /// Return true if the given instruction (assumed to be a memory access
605  /// instruction) has a volatile variant. If that's the case then we can avoid
606  /// addrspacecast to generic AS for volatile loads/stores. Default
607  /// implementation returns false, which prevents address space inference for
608  /// volatile loads/stores.
609  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
610 
611  /// Return true if target doesn't mind addresses in vectors.
612  bool prefersVectorizedAddressing() const;
613 
614  /// Return the cost of the scaling factor used in the addressing
615  /// mode represented by AM for this target, for a load/store
616  /// of the specified type.
617  /// If the AM is supported, the return value must be >= 0.
618  /// If the AM is not supported, it returns a negative value.
619  /// TODO: Handle pre/postinc as well.
620  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
621  bool HasBaseReg, int64_t Scale,
622  unsigned AddrSpace = 0) const;
623 
624  /// Return true if the loop strength reduce pass should make
625  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
626  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
627  /// immediate offset and no index register.
628  bool LSRWithInstrQueries() const;
629 
630  /// Return true if it's free to truncate a value of type Ty1 to type
631  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
632  /// by referencing its sub-register AX.
633  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
634 
635  /// Return true if it is profitable to hoist instruction in the
636  /// then/else to before if.
637  bool isProfitableToHoist(Instruction *I) const;
638 
639  bool useAA() const;
640 
641  /// Return true if this type is legal.
642  bool isTypeLegal(Type *Ty) const;
643 
644  /// Return true if switches should be turned into lookup tables for the
645  /// target.
646  bool shouldBuildLookupTables() const;
647 
648  /// Return true if switches should be turned into lookup tables
649  /// containing this constant value for the target.
650  bool shouldBuildLookupTablesForConstant(Constant *C) const;
651 
652  /// Return true if the input function which is cold at all call sites,
653  /// should use coldcc calling convention.
654  bool useColdCCForColdCall(Function &F) const;
655 
656  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
657 
658  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
659  unsigned VF) const;
660 
661  /// If target has efficient vector element load/store instructions, it can
662  /// return true here so that insertion/extraction costs are not added to
663  /// the scalarization cost of a load/store.
664  bool supportsEfficientVectorElementLoadStore() const;
665 
666  /// Don't restrict interleaved unrolling to small loops.
667  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
668 
669  /// Returns options for expansion of memcmp. IsZeroCmp is
670  // true if this is the expansion of memcmp(p1, p2, s) == 0.
672  // Return true if memcmp expansion is enabled.
673  operator bool() const { return MaxNumLoads > 0; }
674 
675  // Maximum number of load operations.
676  unsigned MaxNumLoads = 0;
677 
678  // The list of available load sizes (in bytes), sorted in decreasing order.
680 
681  // For memcmp expansion when the memcmp result is only compared equal or
682  // not-equal to 0, allow up to this number of load pairs per block. As an
683  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
684  // a0 = load2bytes &a[0]
685  // b0 = load2bytes &b[0]
686  // a2 = load1byte &a[2]
687  // b2 = load1byte &b[2]
688  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
689  unsigned NumLoadsPerBlock = 1;
690 
691  // Set to true to allow overlapping loads. For example, 7-byte compares can
692  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
693  // requires all loads in LoadSizes to be doable in an unaligned way.
694  bool AllowOverlappingLoads = false;
695  };
696  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
697  bool IsZeroCmp) const;
698 
699  /// Enable matching of interleaved access groups.
700  bool enableInterleavedAccessVectorization() const;
701 
702  /// Enable matching of interleaved access groups that contain predicated
703  /// accesses or gaps and therefore vectorized using masked
704  /// vector loads/stores.
705  bool enableMaskedInterleavedAccessVectorization() const;
706 
707  /// Indicate that it is potentially unsafe to automatically vectorize
708  /// floating-point operations because the semantics of vector and scalar
709  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
710  /// does not support IEEE-754 denormal numbers, while depending on the
711  /// platform, scalar floating-point math does.
712  /// This applies to floating-point math operations and calls, not memory
713  /// operations, shuffles, or casts.
714  bool isFPVectorizationPotentiallyUnsafe() const;
715 
716  /// Determine if the target supports unaligned memory accesses.
717  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
718  unsigned BitWidth, unsigned AddressSpace = 0,
719  unsigned Alignment = 1,
720  bool *Fast = nullptr) const;
721 
722  /// Return hardware support for population count.
723  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
724 
725  /// Return true if the hardware has a fast square-root instruction.
726  bool haveFastSqrt(Type *Ty) const;
727 
728  /// Return true if it is faster to check if a floating-point value is NaN
729  /// (or not-NaN) versus a comparison against a constant FP zero value.
730  /// Targets should override this if materializing a 0.0 for comparison is
731  /// generally as cheap as checking for ordered/unordered.
732  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
733 
734  /// Return the expected cost of supporting the floating point operation
735  /// of the specified type.
736  int getFPOpCost(Type *Ty) const;
737 
738  /// Return the expected cost of materializing for the given integer
739  /// immediate of the specified type.
740  int getIntImmCost(const APInt &Imm, Type *Ty) const;
741 
742  /// Return the expected cost of materialization for the given integer
743  /// immediate of the specified type for a given instruction. The cost can be
744  /// zero if the immediate can be folded into the specified instruction.
745  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
746  Type *Ty) const;
747  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
748  Type *Ty) const;
749 
750  /// Return the expected cost for the given integer when optimising
751  /// for size. This is different than the other integer immediate cost
752  /// functions in that it is subtarget agnostic. This is useful when you e.g.
753  /// target one ISA such as Aarch32 but smaller encodings could be possible
754  /// with another such as Thumb. This return value is used as a penalty when
755  /// the total costs for a constant is calculated (the bigger the cost, the
756  /// more beneficial constant hoisting is).
757  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
758  Type *Ty) const;
759  /// @}
760 
761  /// \name Vector Target Information
762  /// @{
763 
764  /// The various kinds of shuffle patterns for vector queries.
765  enum ShuffleKind {
766  SK_Broadcast, ///< Broadcast element 0 to all other elements.
767  SK_Reverse, ///< Reverse the order of the vector.
768  SK_Select, ///< Selects elements from the corresponding lane of
769  ///< either source operand. This is equivalent to a
770  ///< vector select with a constant condition operand.
771  SK_Transpose, ///< Transpose two vectors.
772  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
773  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
774  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
775  ///< with any shuffle mask.
776  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
777  ///< shuffle mask.
778  };
779 
780  /// Additional information about an operand's possible values.
782  OK_AnyValue, // Operand can have any value.
783  OK_UniformValue, // Operand is uniform (splat of a value).
784  OK_UniformConstantValue, // Operand is uniform constant.
785  OK_NonUniformConstantValue // Operand is a non uniform constant value.
786  };
787 
788  /// Additional properties of an operand's values.
789  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
790 
791  /// \return the number of registers in the target-provided register class.
792  unsigned getNumberOfRegisters(unsigned ClassID) const;
793 
794  /// \return the target-provided register class ID for the provided type,
795  /// accounting for type promotion and other type-legalization techniques that the target might apply.
796  /// However, it specifically does not account for the scalarization or splitting of vector types.
797  /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
798  /// that type should be mapped to a register class containing no registers.
799  /// Specifically, this is designed to provide a simple, high-level view of the register allocation
800  /// later performed by the backend. These register classes don't necessarily map onto the
801  /// register classes used by the backend.
802  /// FIXME: It's not currently possible to determine how many registers
803  /// are used by the provided type.
804  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
805 
806  /// \return the target-provided register class name
807  const char* getRegisterClassName(unsigned ClassID) const;
808 
809  /// \return The width of the largest scalar or vector register type.
810  unsigned getRegisterBitWidth(bool Vector) const;
811 
812  /// \return The width of the smallest vector register type.
813  unsigned getMinVectorRegisterBitWidth() const;
814 
815  /// \return True if the vectorization factor should be chosen to
816  /// make the vector of the smallest element type match the size of a
817  /// vector register. For wider element types, this could result in
818  /// creating vectors that span multiple vector registers.
819  /// If false, the vectorization factor will be chosen based on the
820  /// size of the widest element type.
821  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
822 
823  /// \return The minimum vectorization factor for types of given element
824  /// bit width, or 0 if there is no minimum VF. The returned value only
825  /// applies when shouldMaximizeVectorBandwidth returns true.
826  unsigned getMinimumVF(unsigned ElemWidth) const;
827 
828  /// \return True if it should be considered for address type promotion.
829  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
830  /// profitable without finding other extensions fed by the same input.
831  bool shouldConsiderAddressTypePromotion(
832  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
833 
834  /// \return The size of a cache line in bytes.
835  unsigned getCacheLineSize() const;
836 
837  /// The possible cache levels
838  enum class CacheLevel {
839  L1D, // The L1 data cache
840  L2D, // The L2 data cache
841 
842  // We currently do not model L3 caches, as their sizes differ widely between
843  // microarchitectures. Also, we currently do not have a use for L3 cache
844  // size modeling yet.
845  };
846 
847  /// \return The size of the cache level in bytes, if available.
848  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
849 
850  /// \return The associativity of the cache level, if available.
851  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
852 
853  /// \return How much before a load we should place the prefetch
854  /// instruction. This is currently measured in number of
855  /// instructions.
856  unsigned getPrefetchDistance() const;
857 
858  /// \return Some HW prefetchers can handle accesses up to a certain
859  /// constant stride. This is the minimum stride in bytes where it
860  /// makes sense to start adding SW prefetches. The default is 1,
861  /// i.e. prefetch with any stride.
862  unsigned getMinPrefetchStride() const;
863 
864  /// \return The maximum number of iterations to prefetch ahead. If
865  /// the required number of iterations is more than this number, no
866  /// prefetching is performed.
867  unsigned getMaxPrefetchIterationsAhead() const;
868 
869  /// \return The maximum interleave factor that any transform should try to
870  /// perform for this target. This number depends on the level of parallelism
871  /// and the number of execution units in the CPU.
872  unsigned getMaxInterleaveFactor(unsigned VF) const;
873 
874  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
875  static OperandValueKind getOperandInfo(Value *V,
876  OperandValueProperties &OpProps);
877 
878  /// This is an approximation of reciprocal throughput of a math/logic op.
879  /// A higher cost indicates less expected throughput.
880  /// From Agner Fog's guides, reciprocal throughput is "the average number of
881  /// clock cycles per instruction when the instructions are not part of a
882  /// limiting dependency chain."
883  /// Therefore, costs should be scaled to account for multiple execution units
884  /// on the target that can process this type of instruction. For example, if
885  /// there are 5 scalar integer units and 2 vector integer units that can
886  /// calculate an 'add' in a single cycle, this model should indicate that the
887  /// cost of the vector add instruction is 2.5 times the cost of the scalar
888  /// add instruction.
889  /// \p Args is an optional argument which holds the instruction operands
890  /// values so the TTI can analyze those values searching for special
891  /// cases or optimizations based on those values.
892  int getArithmeticInstrCost(
893  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
894  OperandValueKind Opd2Info = OK_AnyValue,
895  OperandValueProperties Opd1PropInfo = OP_None,
896  OperandValueProperties Opd2PropInfo = OP_None,
898 
899  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
900  /// The index and subtype parameters are used by the subvector insertion and
901  /// extraction shuffle kinds to show the insert/extract point and the type of
902  /// the subvector being inserted/extracted.
903  /// NOTE: For subvector extractions Tp represents the source type.
904  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
905  Type *SubTp = nullptr) const;
906 
907  /// \return The expected cost of cast instructions, such as bitcast, trunc,
908  /// zext, etc. If there is an existing instruction that holds Opcode, it
909  /// may be passed in the 'I' parameter.
910  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
911  const Instruction *I = nullptr) const;
912 
913  /// \return The expected cost of a sign- or zero-extended vector extract. Use
914  /// -1 to indicate that there is no information about the index value.
915  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
916  unsigned Index = -1) const;
917 
918  /// \return The expected cost of control-flow related instructions such as
919  /// Phi, Ret, Br.
920  int getCFInstrCost(unsigned Opcode) const;
921 
922  /// \returns The expected cost of compare and select instructions. If there
923  /// is an existing instruction that holds Opcode, it may be passed in the
924  /// 'I' parameter.
925  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
926  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
927 
928  /// \return The expected cost of vector Insert and Extract.
929  /// Use -1 to indicate that there is no information on the index value.
930  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
931 
932  /// \return The cost of Load and Store instructions.
933  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
934  unsigned AddressSpace, const Instruction *I = nullptr) const;
935 
936  /// \return The cost of masked Load and Store instructions.
937  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
938  unsigned AddressSpace) const;
939 
940  /// \return The cost of Gather or Scatter operation
941  /// \p Opcode - is a type of memory access Load or Store
942  /// \p DataTy - a vector type of the data to be loaded or stored
943  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
944  /// \p VariableMask - true when the memory access is predicated with a mask
945  /// that is not a compile-time constant
946  /// \p Alignment - alignment of single element
947  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
948  bool VariableMask, unsigned Alignment) const;
949 
950  /// \return The cost of the interleaved memory operation.
951  /// \p Opcode is the memory operation code
952  /// \p VecTy is the vector type of the interleaved access.
953  /// \p Factor is the interleave factor
954  /// \p Indices is the indices for interleaved load members (as interleaved
955  /// load allows gaps)
956  /// \p Alignment is the alignment of the memory operation
957  /// \p AddressSpace is address space of the pointer.
958  /// \p UseMaskForCond indicates if the memory access is predicated.
959  /// \p UseMaskForGaps indicates if gaps should be masked.
960  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
961  ArrayRef<unsigned> Indices, unsigned Alignment,
962  unsigned AddressSpace,
963  bool UseMaskForCond = false,
964  bool UseMaskForGaps = false) const;
965 
966  /// Calculate the cost of performing a vector reduction.
967  ///
968  /// This is the cost of reducing the vector value of type \p Ty to a scalar
969  /// value using the operation denoted by \p Opcode. The form of the reduction
970  /// can either be a pairwise reduction or a reduction that splits the vector
971  /// at every reduction level.
972  ///
973  /// Pairwise:
974  /// (v0, v1, v2, v3)
975  /// ((v0+v1), (v2+v3), undef, undef)
976  /// Split:
977  /// (v0, v1, v2, v3)
978  /// ((v0+v2), (v1+v3), undef, undef)
979  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
980  bool IsPairwiseForm) const;
981  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
982  bool IsUnsigned) const;
983 
984  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
985  /// Three cases are handled: 1. scalar instruction 2. vector instruction
986  /// 3. scalar instruction which is to be vectorized with VF.
987  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
989  unsigned VF = 1) const;
990 
991  /// \returns The cost of Intrinsic instructions. Types analysis only.
992  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
993  /// arguments and the return value will be computed based on types.
994  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
996  unsigned ScalarizationCostPassed = UINT_MAX) const;
997 
998  /// \returns The cost of Call instructions.
999  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
1000 
1001  /// \returns The number of pieces into which the provided type must be
1002  /// split during legalization. Zero is returned when the answer is unknown.
1003  unsigned getNumberOfParts(Type *Tp) const;
1004 
1005  /// \returns The cost of the address computation. For most targets this can be
1006  /// merged into the instruction indexing mode. Some targets might want to
1007  /// distinguish between address computation for memory operations on vector
1008  /// types and scalar types. Such targets should override this function.
1009  /// The 'SE' parameter holds pointer for the scalar evolution object which
1010  /// is used in order to get the Ptr step value in case of constant stride.
1011  /// The 'Ptr' parameter holds SCEV of the access pointer.
1012  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1013  const SCEV *Ptr = nullptr) const;
1014 
1015  /// \returns The cost, if any, of keeping values of the given types alive
1016  /// over a callsite.
1017  ///
1018  /// Some types may require the use of register classes that do not have
1019  /// any callee-saved registers, so would require a spill and fill.
1020  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1021 
1022  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1023  /// will contain additional information - whether the intrinsic may write
1024  /// or read to memory, volatility and the pointer. Info is undefined
1025  /// if false is returned.
1026  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1027 
1028  /// \returns The maximum element size, in bytes, for an element
1029  /// unordered-atomic memory intrinsic.
1030  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1031 
1032  /// \returns A value which is the result of the given memory intrinsic. New
1033  /// instructions may be created to extract the result from the given intrinsic
1034  /// memory operation. Returns nullptr if the target cannot create a result
1035  /// from the given intrinsic.
1036  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1037  Type *ExpectedType) const;
1038 
1039  /// \returns The type to use in a loop expansion of a memcpy call.
1040  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1041  unsigned SrcAlign, unsigned DestAlign) const;
1042 
1043  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1044  /// \param RemainingBytes The number of bytes to copy.
1045  ///
1046  /// Calculates the operand types to use when copying \p RemainingBytes of
1047  /// memory, where source and destination alignments are \p SrcAlign and
1048  /// \p DestAlign respectively.
1049  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1050  LLVMContext &Context,
1051  unsigned RemainingBytes,
1052  unsigned SrcAlign,
1053  unsigned DestAlign) const;
1054 
1055  /// \returns True if the two functions have compatible attributes for inlining
1056  /// purposes.
1057  bool areInlineCompatible(const Function *Caller,
1058  const Function *Callee) const;
1059 
1060  /// \returns True if the caller and callee agree on how \p Args will be passed
1061  /// to the callee.
1062  /// \param[out] Args The list of compatible arguments. The implementation may
1063  /// filter out any incompatible args from this list.
1064  bool areFunctionArgsABICompatible(const Function *Caller,
1065  const Function *Callee,
1066  SmallPtrSetImpl<Argument *> &Args) const;
1067 
1068  /// The type of load/store indexing.
1070  MIM_Unindexed, ///< No indexing.
1071  MIM_PreInc, ///< Pre-incrementing.
1072  MIM_PreDec, ///< Pre-decrementing.
1073  MIM_PostInc, ///< Post-incrementing.
1074  MIM_PostDec ///< Post-decrementing.
1075  };
1076 
1077  /// \returns True if the specified indexed load for the given type is legal.
1078  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1079 
1080  /// \returns True if the specified indexed store for the given type is legal.
1081  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1082 
1083  /// \returns The bitwidth of the largest vector type that should be used to
1084  /// load/store in the given address space.
1085  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1086 
1087  /// \returns True if the load instruction is legal to vectorize.
1088  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1089 
1090  /// \returns True if the store instruction is legal to vectorize.
1091  bool isLegalToVectorizeStore(StoreInst *SI) const;
1092 
1093  /// \returns True if it is legal to vectorize the given load chain.
1094  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1095  unsigned Alignment,
1096  unsigned AddrSpace) const;
1097 
1098  /// \returns True if it is legal to vectorize the given store chain.
1099  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1100  unsigned Alignment,
1101  unsigned AddrSpace) const;
1102 
1103  /// \returns The new vector factor value if the target doesn't support \p
1104  /// SizeInBytes loads or has a better vector factor.
1105  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1106  unsigned ChainSizeInBytes,
1107  VectorType *VecTy) const;
1108 
1109  /// \returns The new vector factor value if the target doesn't support \p
1110  /// SizeInBytes stores or has a better vector factor.
1111  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1112  unsigned ChainSizeInBytes,
1113  VectorType *VecTy) const;
1114 
1115  /// Flags describing the kind of vector reduction.
1117  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1118  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1119  bool IsSigned; ///< Whether the operation is a signed int reduction.
1120  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1121  };
1122 
1123  /// \returns True if the target wants to handle the given reduction idiom in
1124  /// the intrinsics form instead of the shuffle form.
1125  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1126  ReductionFlags Flags) const;
1127 
1128  /// \returns True if the target wants to expand the given reduction intrinsic
1129  /// into a shuffle sequence.
1130  bool shouldExpandReduction(const IntrinsicInst *II) const;
1131 
1132  /// \returns the size cost of rematerializing a GlobalValue address relative
1133  /// to a stack reload.
1134  unsigned getGISelRematGlobalCost() const;
1135 
1136  /// @}
1137 
1138 private:
1139  /// Estimate the latency of specified instruction.
1140  /// Returns 1 as the default value.
1141  int getInstructionLatency(const Instruction *I) const;
1142 
1143  /// Returns the expected throughput cost of the instruction.
1144  /// Returns -1 if the cost is unknown.
1145  int getInstructionThroughput(const Instruction *I) const;
1146 
1147  /// The abstract base class used to type erase specific TTI
1148  /// implementations.
1149  class Concept;
1150 
1151  /// The template model for the base class which wraps a concrete
1152  /// implementation in a type erased interface.
1153  template <typename T> class Model;
1154 
1155  std::unique_ptr<Concept> TTIImpl;
1156 };
1157 
1159 public:
1160  virtual ~Concept() = 0;
1161  virtual const DataLayout &getDataLayout() const = 0;
1162  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1163  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1165  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1166  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1167  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1168  virtual int getCallCost(const Function *F,
1169  ArrayRef<const Value *> Arguments, const User *U) = 0;
1170  virtual unsigned getInliningThresholdMultiplier() = 0;
1171  virtual int getInlinerVectorBonusPercent() = 0;
1172  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1173  ArrayRef<Type *> ParamTys, const User *U) = 0;
1174  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1175  ArrayRef<const Value *> Arguments,
1176  const User *U) = 0;
1177  virtual int getMemcpyCost(const Instruction *I) = 0;
1178  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1179  unsigned &JTSize) = 0;
1180  virtual int
1181  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1182  virtual bool hasBranchDivergence() = 0;
1183  virtual bool isSourceOfDivergence(const Value *V) = 0;
1184  virtual bool isAlwaysUniform(const Value *V) = 0;
1185  virtual unsigned getFlatAddressSpace() = 0;
1186  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1187  Intrinsic::ID IID) const = 0;
1188  virtual bool rewriteIntrinsicWithAddressSpace(
1189  IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
1190  virtual bool isLoweredToCall(const Function *F) = 0;
1191  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1192  UnrollingPreferences &UP) = 0;
1193  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1194  AssumptionCache &AC,
1195  TargetLibraryInfo *LibInfo,
1196  HardwareLoopInfo &HWLoopInfo) = 0;
1197  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1198  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1199  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1200  int64_t BaseOffset, bool HasBaseReg,
1201  int64_t Scale,
1202  unsigned AddrSpace,
1203  Instruction *I) = 0;
1204  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1206  virtual bool canMacroFuseCmp() = 0;
1207  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1208  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1209  TargetLibraryInfo *LibInfo) = 0;
1210  virtual bool shouldFavorPostInc() const = 0;
1211  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1212  virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
1213  virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
1214  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1215  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1216  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1217  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1218  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1219  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1220  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1221  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1222  virtual bool prefersVectorizedAddressing() = 0;
1223  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1224  int64_t BaseOffset, bool HasBaseReg,
1225  int64_t Scale, unsigned AddrSpace) = 0;
1226  virtual bool LSRWithInstrQueries() = 0;
1227  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1228  virtual bool isProfitableToHoist(Instruction *I) = 0;
1229  virtual bool useAA() = 0;
1230  virtual bool isTypeLegal(Type *Ty) = 0;
1231  virtual bool shouldBuildLookupTables() = 0;
1232  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1233  virtual bool useColdCCForColdCall(Function &F) = 0;
1234  virtual unsigned
1235  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1236  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1237  unsigned VF) = 0;
1238  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1239  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1240  virtual MemCmpExpansionOptions
1241  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1242  virtual bool enableInterleavedAccessVectorization() = 0;
1243  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1244  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1245  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1246  unsigned BitWidth,
1247  unsigned AddressSpace,
1248  unsigned Alignment,
1249  bool *Fast) = 0;
1250  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1251  virtual bool haveFastSqrt(Type *Ty) = 0;
1252  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1253  virtual int getFPOpCost(Type *Ty) = 0;
1254  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1255  Type *Ty) = 0;
1256  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1257  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1258  Type *Ty) = 0;
1259  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1260  Type *Ty) = 0;
1261  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1262  virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
1263  virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
1264  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1265  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1266  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1267  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1268  virtual bool shouldConsiderAddressTypePromotion(
1269  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1270  virtual unsigned getCacheLineSize() const = 0;
1271  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1272  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1273 
1274  /// \return How much before a load we should place the prefetch
1275  /// instruction. This is currently measured in number of
1276  /// instructions.
1277  virtual unsigned getPrefetchDistance() const = 0;
1278 
1279  /// \return Some HW prefetchers can handle accesses up to a certain
1280  /// constant stride. This is the minimum stride in bytes where it
1281  /// makes sense to start adding SW prefetches. The default is 1,
1282  /// i.e. prefetch with any stride.
1283  virtual unsigned getMinPrefetchStride() const = 0;
1284 
1285  /// \return The maximum number of iterations to prefetch ahead. If
1286  /// the required number of iterations is more than this number, no
1287  /// prefetching is performed.
1288  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1289 
1290  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1291  virtual unsigned
1292  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1293  OperandValueKind Opd2Info,
1294  OperandValueProperties Opd1PropInfo,
1295  OperandValueProperties Opd2PropInfo,
1296  ArrayRef<const Value *> Args) = 0;
1297  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1298  Type *SubTp) = 0;
1299  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1300  const Instruction *I) = 0;
1301  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1302  VectorType *VecTy, unsigned Index) = 0;
1303  virtual int getCFInstrCost(unsigned Opcode) = 0;
1304  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1305  Type *CondTy, const Instruction *I) = 0;
1306  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1307  unsigned Index) = 0;
1308  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1309  unsigned AddressSpace, const Instruction *I) = 0;
1310  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1311  unsigned Alignment,
1312  unsigned AddressSpace) = 0;
1313  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1314  Value *Ptr, bool VariableMask,
1315  unsigned Alignment) = 0;
1316  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1317  unsigned Factor,
1318  ArrayRef<unsigned> Indices,
1319  unsigned Alignment,
1320  unsigned AddressSpace,
1321  bool UseMaskForCond = false,
1322  bool UseMaskForGaps = false) = 0;
1323  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1324  bool IsPairwiseForm) = 0;
1325  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1326  bool IsPairwiseForm, bool IsUnsigned) = 0;
1327  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1329  unsigned ScalarizationCostPassed) = 0;
1330  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1331  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1332  virtual int getCallInstrCost(Function *F, Type *RetTy,
1333  ArrayRef<Type *> Tys) = 0;
1334  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1335  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1336  const SCEV *Ptr) = 0;
1337  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1338  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1339  MemIntrinsicInfo &Info) = 0;
1340  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1341  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1342  Type *ExpectedType) = 0;
1343  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1344  unsigned SrcAlign,
1345  unsigned DestAlign) const = 0;
1346  virtual void getMemcpyLoopResidualLoweringType(
1347  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1348  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1349  virtual bool areInlineCompatible(const Function *Caller,
1350  const Function *Callee) const = 0;
1351  virtual bool
1352  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1353  SmallPtrSetImpl<Argument *> &Args) const = 0;
1354  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1355  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1356  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1357  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1358  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1359  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1360  unsigned Alignment,
1361  unsigned AddrSpace) const = 0;
1362  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1363  unsigned Alignment,
1364  unsigned AddrSpace) const = 0;
1365  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1366  unsigned ChainSizeInBytes,
1367  VectorType *VecTy) const = 0;
1368  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1369  unsigned ChainSizeInBytes,
1370  VectorType *VecTy) const = 0;
1371  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1372  ReductionFlags) const = 0;
1373  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1374  virtual unsigned getGISelRematGlobalCost() const = 0;
1375  virtual int getInstructionLatency(const Instruction *I) = 0;
1376 };
1377 
1378 template <typename T>
1380  T Impl;
1381 
1382 public:
1383  Model(T Impl) : Impl(std::move(Impl)) {}
1384  ~Model() override {}
1385 
1386  const DataLayout &getDataLayout() const override {
1387  return Impl.getDataLayout();
1388  }
1389 
1390  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1391  return Impl.getOperationCost(Opcode, Ty, OpTy);
1392  }
1393  int getGEPCost(Type *PointeeType, const Value *Ptr,
1394  ArrayRef<const Value *> Operands) override {
1395  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1396  }
1397  int getExtCost(const Instruction *I, const Value *Src) override {
1398  return Impl.getExtCost(I, Src);
1399  }
1400  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1401  return Impl.getCallCost(FTy, NumArgs, U);
1402  }
1403  int getCallCost(const Function *F, int NumArgs, const User *U) override {
1404  return Impl.getCallCost(F, NumArgs, U);
1405  }
1406  int getCallCost(const Function *F,
1407  ArrayRef<const Value *> Arguments, const User *U) override {
1408  return Impl.getCallCost(F, Arguments, U);
1409  }
1410  unsigned getInliningThresholdMultiplier() override {
1411  return Impl.getInliningThresholdMultiplier();
1412  }
1413  int getInlinerVectorBonusPercent() override {
1414  return Impl.getInlinerVectorBonusPercent();
1415  }
1416  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1417  ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1418  return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1419  }
1420  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1421  ArrayRef<const Value *> Arguments,
1422  const User *U = nullptr) override {
1423  return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1424  }
1425  int getMemcpyCost(const Instruction *I) override {
1426  return Impl.getMemcpyCost(I);
1427  }
1428  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1429  return Impl.getUserCost(U, Operands);
1430  }
1431  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1432  bool isSourceOfDivergence(const Value *V) override {
1433  return Impl.isSourceOfDivergence(V);
1434  }
1435 
1436  bool isAlwaysUniform(const Value *V) override {
1437  return Impl.isAlwaysUniform(V);
1438  }
1439 
1440  unsigned getFlatAddressSpace() override {
1441  return Impl.getFlatAddressSpace();
1442  }
1443 
1444  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1445  Intrinsic::ID IID) const override {
1446  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1447  }
1448 
1449  bool rewriteIntrinsicWithAddressSpace(
1450  IntrinsicInst *II, Value *OldV, Value *NewV) const override {
1451  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1452  }
1453 
1454  bool isLoweredToCall(const Function *F) override {
1455  return Impl.isLoweredToCall(F);
1456  }
1457  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1458  UnrollingPreferences &UP) override {
1459  return Impl.getUnrollingPreferences(L, SE, UP);
1460  }
1461  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1462  AssumptionCache &AC,
1463  TargetLibraryInfo *LibInfo,
1464  HardwareLoopInfo &HWLoopInfo) override {
1465  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1466  }
1467  bool isLegalAddImmediate(int64_t Imm) override {
1468  return Impl.isLegalAddImmediate(Imm);
1469  }
1470  bool isLegalICmpImmediate(int64_t Imm) override {
1471  return Impl.isLegalICmpImmediate(Imm);
1472  }
1473  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1474  bool HasBaseReg, int64_t Scale,
1475  unsigned AddrSpace,
1476  Instruction *I) override {
1477  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1478  Scale, AddrSpace, I);
1479  }
1480  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1481  TargetTransformInfo::LSRCost &C2) override {
1482  return Impl.isLSRCostLess(C1, C2);
1483  }
1484  bool canMacroFuseCmp() override {
1485  return Impl.canMacroFuseCmp();
1486  }
1487  bool canSaveCmp(Loop *L, BranchInst **BI,
1488  ScalarEvolution *SE,
1489  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1490  TargetLibraryInfo *LibInfo) override {
1491  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1492  }
1493  bool shouldFavorPostInc() const override {
1494  return Impl.shouldFavorPostInc();
1495  }
1496  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1497  return Impl.shouldFavorBackedgeIndex(L);
1498  }
1499  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
1500  return Impl.isLegalMaskedStore(DataType, Alignment);
1501  }
1502  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
1503  return Impl.isLegalMaskedLoad(DataType, Alignment);
1504  }
1505  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1506  return Impl.isLegalNTStore(DataType, Alignment);
1507  }
1508  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1509  return Impl.isLegalNTLoad(DataType, Alignment);
1510  }
1511  bool isLegalMaskedScatter(Type *DataType) override {
1512  return Impl.isLegalMaskedScatter(DataType);
1513  }
1514  bool isLegalMaskedGather(Type *DataType) override {
1515  return Impl.isLegalMaskedGather(DataType);
1516  }
1517  bool isLegalMaskedCompressStore(Type *DataType) override {
1518  return Impl.isLegalMaskedCompressStore(DataType);
1519  }
1520  bool isLegalMaskedExpandLoad(Type *DataType) override {
1521  return Impl.isLegalMaskedExpandLoad(DataType);
1522  }
1523  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1524  return Impl.hasDivRemOp(DataType, IsSigned);
1525  }
1526  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1527  return Impl.hasVolatileVariant(I, AddrSpace);
1528  }
1529  bool prefersVectorizedAddressing() override {
1530  return Impl.prefersVectorizedAddressing();
1531  }
1532  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1533  bool HasBaseReg, int64_t Scale,
1534  unsigned AddrSpace) override {
1535  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1536  Scale, AddrSpace);
1537  }
1538  bool LSRWithInstrQueries() override {
1539  return Impl.LSRWithInstrQueries();
1540  }
1541  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1542  return Impl.isTruncateFree(Ty1, Ty2);
1543  }
1544  bool isProfitableToHoist(Instruction *I) override {
1545  return Impl.isProfitableToHoist(I);
1546  }
1547  bool useAA() override { return Impl.useAA(); }
1548  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1549  bool shouldBuildLookupTables() override {
1550  return Impl.shouldBuildLookupTables();
1551  }
1552  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1553  return Impl.shouldBuildLookupTablesForConstant(C);
1554  }
1555  bool useColdCCForColdCall(Function &F) override {
1556  return Impl.useColdCCForColdCall(F);
1557  }
1558 
1559  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1560  bool Extract) override {
1561  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1562  }
1563  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1564  unsigned VF) override {
1565  return Impl.getOperandsScalarizationOverhead(Args, VF);
1566  }
1567 
1568  bool supportsEfficientVectorElementLoadStore() override {
1569  return Impl.supportsEfficientVectorElementLoadStore();
1570  }
1571 
1572  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1573  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1574  }
1575  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1576  bool IsZeroCmp) const override {
1577  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1578  }
1579  bool enableInterleavedAccessVectorization() override {
1580  return Impl.enableInterleavedAccessVectorization();
1581  }
1582  bool enableMaskedInterleavedAccessVectorization() override {
1583  return Impl.enableMaskedInterleavedAccessVectorization();
1584  }
1585  bool isFPVectorizationPotentiallyUnsafe() override {
1586  return Impl.isFPVectorizationPotentiallyUnsafe();
1587  }
1588  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1589  unsigned BitWidth, unsigned AddressSpace,
1590  unsigned Alignment, bool *Fast) override {
1591  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1592  Alignment, Fast);
1593  }
1594  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1595  return Impl.getPopcntSupport(IntTyWidthInBit);
1596  }
1597  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1598 
1599  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1600  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1601  }
1602 
1603  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1604 
1605  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1606  Type *Ty) override {
1607  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1608  }
1609  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1610  return Impl.getIntImmCost(Imm, Ty);
1611  }
1612  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1613  Type *Ty) override {
1614  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1615  }
1616  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1617  Type *Ty) override {
1618  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1619  }
1620  unsigned getNumberOfRegisters(unsigned ClassID) const override {
1621  return Impl.getNumberOfRegisters(ClassID);
1622  }
1623  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
1624  return Impl.getRegisterClassForType(Vector, Ty);
1625  }
1626  const char* getRegisterClassName(unsigned ClassID) const override {
1627  return Impl.getRegisterClassName(ClassID);
1628  }
1629  unsigned getRegisterBitWidth(bool Vector) const override {
1630  return Impl.getRegisterBitWidth(Vector);
1631  }
1632  unsigned getMinVectorRegisterBitWidth() override {
1633  return Impl.getMinVectorRegisterBitWidth();
1634  }
1635  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1636  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1637  }
1638  unsigned getMinimumVF(unsigned ElemWidth) const override {
1639  return Impl.getMinimumVF(ElemWidth);
1640  }
1641  bool shouldConsiderAddressTypePromotion(
1642  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1643  return Impl.shouldConsiderAddressTypePromotion(
1644  I, AllowPromotionWithoutCommonHeader);
1645  }
1646  unsigned getCacheLineSize() const override {
1647  return Impl.getCacheLineSize();
1648  }
1649  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1650  return Impl.getCacheSize(Level);
1651  }
1652  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1653  return Impl.getCacheAssociativity(Level);
1654  }
1655 
1656  /// Return the preferred prefetch distance in terms of instructions.
1657  ///
1658  unsigned getPrefetchDistance() const override {
1659  return Impl.getPrefetchDistance();
1660  }
1661 
1662  /// Return the minimum stride necessary to trigger software
1663  /// prefetching.
1664  ///
1665  unsigned getMinPrefetchStride() const override {
1666  return Impl.getMinPrefetchStride();
1667  }
1668 
1669  /// Return the maximum prefetch distance in terms of loop
1670  /// iterations.
1671  ///
1672  unsigned getMaxPrefetchIterationsAhead() const override {
1673  return Impl.getMaxPrefetchIterationsAhead();
1674  }
1675 
1676  unsigned getMaxInterleaveFactor(unsigned VF) override {
1677  return Impl.getMaxInterleaveFactor(VF);
1678  }
1679  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1680  unsigned &JTSize) override {
1681  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1682  }
1683  unsigned
1684  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1685  OperandValueKind Opd2Info,
1686  OperandValueProperties Opd1PropInfo,
1687  OperandValueProperties Opd2PropInfo,
1688  ArrayRef<const Value *> Args) override {
1689  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1690  Opd1PropInfo, Opd2PropInfo, Args);
1691  }
1692  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1693  Type *SubTp) override {
1694  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1695  }
1696  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1697  const Instruction *I) override {
1698  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1699  }
1700  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1701  unsigned Index) override {
1702  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1703  }
1704  int getCFInstrCost(unsigned Opcode) override {
1705  return Impl.getCFInstrCost(Opcode);
1706  }
1707  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1708  const Instruction *I) override {
1709  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1710  }
1711  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1712  return Impl.getVectorInstrCost(Opcode, Val, Index);
1713  }
1714  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1715  unsigned AddressSpace, const Instruction *I) override {
1716  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1717  }
1718  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1719  unsigned AddressSpace) override {
1720  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1721  }
1722  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1723  Value *Ptr, bool VariableMask,
1724  unsigned Alignment) override {
1725  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1726  Alignment);
1727  }
1728  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1729  ArrayRef<unsigned> Indices, unsigned Alignment,
1730  unsigned AddressSpace, bool UseMaskForCond,
1731  bool UseMaskForGaps) override {
1732  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1733  Alignment, AddressSpace,
1734  UseMaskForCond, UseMaskForGaps);
1735  }
1736  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1737  bool IsPairwiseForm) override {
1738  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1739  }
1740  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1741  bool IsPairwiseForm, bool IsUnsigned) override {
1742  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1743  }
1744  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1745  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1746  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1747  ScalarizationCostPassed);
1748  }
1749  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1750  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1751  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1752  }
1753  int getCallInstrCost(Function *F, Type *RetTy,
1754  ArrayRef<Type *> Tys) override {
1755  return Impl.getCallInstrCost(F, RetTy, Tys);
1756  }
1757  unsigned getNumberOfParts(Type *Tp) override {
1758  return Impl.getNumberOfParts(Tp);
1759  }
1760  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1761  const SCEV *Ptr) override {
1762  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1763  }
1764  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1765  return Impl.getCostOfKeepingLiveOverCall(Tys);
1766  }
1767  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1768  MemIntrinsicInfo &Info) override {
1769  return Impl.getTgtMemIntrinsic(Inst, Info);
1770  }
1771  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1772  return Impl.getAtomicMemIntrinsicMaxElementSize();
1773  }
1774  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1775  Type *ExpectedType) override {
1776  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1777  }
1778  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1779  unsigned SrcAlign,
1780  unsigned DestAlign) const override {
1781  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1782  }
1783  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1784  LLVMContext &Context,
1785  unsigned RemainingBytes,
1786  unsigned SrcAlign,
1787  unsigned DestAlign) const override {
1788  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1789  SrcAlign, DestAlign);
1790  }
1791  bool areInlineCompatible(const Function *Caller,
1792  const Function *Callee) const override {
1793  return Impl.areInlineCompatible(Caller, Callee);
1794  }
1796  const Function *Caller, const Function *Callee,
1797  SmallPtrSetImpl<Argument *> &Args) const override {
1798  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1799  }
1800  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1801  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1802  }
1803  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1804  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1805  }
1806  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1807  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1808  }
1809  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1810  return Impl.isLegalToVectorizeLoad(LI);
1811  }
1812  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1813  return Impl.isLegalToVectorizeStore(SI);
1814  }
1815  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1816  unsigned Alignment,
1817  unsigned AddrSpace) const override {
1818  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1819  AddrSpace);
1820  }
1821  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1822  unsigned Alignment,
1823  unsigned AddrSpace) const override {
1824  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1825  AddrSpace);
1826  }
1827  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1828  unsigned ChainSizeInBytes,
1829  VectorType *VecTy) const override {
1830  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1831  }
1832  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1833  unsigned ChainSizeInBytes,
1834  VectorType *VecTy) const override {
1835  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1836  }
1837  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1838  ReductionFlags Flags) const override {
1839  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1840  }
1841  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1842  return Impl.shouldExpandReduction(II);
1843  }
1844 
1845  unsigned getGISelRematGlobalCost() const override {
1846  return Impl.getGISelRematGlobalCost();
1847  }
1848 
1849  int getInstructionLatency(const Instruction *I) override {
1850  return Impl.getInstructionLatency(I);
1851  }
1852 };
1853 
1854 template <typename T>
1856  : TTIImpl(new Model<T>(Impl)) {}
1857 
1858 /// Analysis pass providing the \c TargetTransformInfo.
1859 ///
1860 /// The core idea of the TargetIRAnalysis is to expose an interface through
1861 /// which LLVM targets can analyze and provide information about the middle
1862 /// end's target-independent IR. This supports use cases such as target-aware
1863 /// cost modeling of IR constructs.
1864 ///
1865 /// This is a function analysis because much of the cost modeling for targets
1866 /// is done in a subtarget specific way and LLVM supports compiling different
1867 /// functions targeting different subtargets in order to support runtime
1868 /// dispatch according to the observed subtarget.
1869 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1870 public:
1872 
1873  /// Default construct a target IR analysis.
1874  ///
1875  /// This will use the module's datalayout to construct a baseline
1876  /// conservative TTI result.
1877  TargetIRAnalysis();
1878 
1879  /// Construct an IR analysis pass around a target-provide callback.
1880  ///
1881  /// The callback will be called with a particular function for which the TTI
1882  /// is needed and must return a TTI object for that function.
1883  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1884 
1885  // Value semantics. We spell out the constructors for MSVC.
1887  : TTICallback(Arg.TTICallback) {}
1889  : TTICallback(std::move(Arg.TTICallback)) {}
1891  TTICallback = RHS.TTICallback;
1892  return *this;
1893  }
1895  TTICallback = std::move(RHS.TTICallback);
1896  return *this;
1897  }
1898 
1899  Result run(const Function &F, FunctionAnalysisManager &);
1900 
1901 private:
1903  static AnalysisKey Key;
1904 
1905  /// The callback used to produce a result.
1906  ///
1907  /// We use a completely opaque callback so that targets can provide whatever
1908  /// mechanism they desire for constructing the TTI for a given function.
1909  ///
1910  /// FIXME: Should we really use std::function? It's relatively inefficient.
1911  /// It might be possible to arrange for even stateful callbacks to outlive
1912  /// the analysis and thus use a function_ref which would be lighter weight.
1913  /// This may also be less error prone as the callback is likely to reference
1914  /// the external TargetMachine, and that reference needs to never dangle.
1915  std::function<Result(const Function &)> TTICallback;
1916 
1917  /// Helper function used as the callback in the default constructor.
1918  static Result getDefaultTTI(const Function &F);
1919 };
1920 
1921 /// Wrapper pass for TargetTransformInfo.
1922 ///
1923 /// This pass can be constructed from a TTI object which it stores internally
1924 /// and is queried by passes.
1926  TargetIRAnalysis TIRA;
1928 
1929  virtual void anchor();
1930 
1931 public:
1932  static char ID;
1933 
1934  /// We must provide a default constructor for the pass but it should
1935  /// never be used.
1936  ///
1937  /// Use the constructor below or call one of the creation routines.
1939 
1941 
1942  TargetTransformInfo &getTTI(const Function &F);
1943 };
1944 
1945 /// Create an analysis pass wrapper around a TTI object.
1946 ///
1947 /// This analysis pass just holds the TTI instance and makes it available to
1948 /// clients.
1950 
1951 } // End llvm namespace
1952 
1953 #endif
uint64_t CallInst * C
bool PeelProfiledIterations
Allow peeling basing on profile.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:169
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
mir Rename Register Operands
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:108
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:325
Reverse the order of the vector.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
Returns options for expansion of memcmp. IsZeroCmp is.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:389
AMDGPU Lower Kernel Arguments
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Class to represent integer types.
Definition: DerivedTypes.h:40
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Attributes of a target dependent hardware loop.
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:432
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:648
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:74
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.