LLVM 20.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/IR/FMF.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
33#include <functional>
34#include <optional>
35#include <utility>
36
37namespace llvm {
38
39namespace Intrinsic {
40typedef unsigned ID;
41}
42
43class AllocaInst;
44class AssumptionCache;
45class BlockFrequencyInfo;
46class DominatorTree;
47class BranchInst;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class SmallBitVector;
63class StoreInst;
64class SwitchInst;
65class TargetLibraryInfo;
66class Type;
67class VPIntrinsic;
68struct KnownBits;
69
70/// Information about a load/store intrinsic defined by the target.
72 /// This is the pointer that the intrinsic is loading from or storing to.
73 /// If this is non-null, then analysis/optimization passes can assume that
74 /// this intrinsic is functionally equivalent to a load/store from this
75 /// pointer.
76 Value *PtrVal = nullptr;
77
78 // Ordering for atomic operations.
80
81 // Same Id is set by the target for corresponding load/store intrinsics.
82 unsigned short MatchingId = 0;
83
84 bool ReadMem = false;
85 bool WriteMem = false;
86 bool IsVolatile = false;
87
88 bool isUnordered() const {
92 }
93};
94
95/// Attributes of a target dependent hardware loop.
97 HardwareLoopInfo() = delete;
99 Loop *L = nullptr;
102 const SCEV *ExitCount = nullptr;
104 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105 // value in every iteration.
106 bool IsNestingLegal = false; // Can a hardware loop be a parent to
107 // another hardware loop?
108 bool CounterInReg = false; // Should loop counter be updated in
109 // the loop via a phi?
110 bool PerformEntryTest = false; // Generate the intrinsic which also performs
111 // icmp ne zero on the loop counter value and
112 // produces an i1 to guard the loop entry.
114 DominatorTree &DT, bool ForceNestedLoop = false,
115 bool ForceHardwareLoopPHI = false);
116 bool canAnalyze(LoopInfo &LI);
117};
118
120 const IntrinsicInst *II = nullptr;
121 Type *RetTy = nullptr;
122 Intrinsic::ID IID;
123 SmallVector<Type *, 4> ParamTys;
125 FastMathFlags FMF;
126 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127 // arguments and the return value will be computed based on types.
128 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129
130public:
132 Intrinsic::ID Id, const CallBase &CI,
134 bool TypeBasedOnly = false);
135
137 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
138 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140
143
147 const IntrinsicInst *I = nullptr,
149
150 Intrinsic::ID getID() const { return IID; }
151 const IntrinsicInst *getInst() const { return II; }
152 Type *getReturnType() const { return RetTy; }
153 FastMathFlags getFlags() const { return FMF; }
154 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157
158 bool isTypeBasedOnly() const {
159 return Arguments.empty();
160 }
161
162 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163};
164
166 /// Don't use tail folding
167 None,
168 /// Use predicate only to mask operations on data in the loop.
169 /// When the VL is not known to be a power-of-2, this method requires a
170 /// runtime overflow check for the i + VL in the loop because it compares the
171 /// scalar induction variable against the tripcount rounded up by VL which may
172 /// overflow. When the VL is a power-of-2, both the increment and uprounded
173 /// tripcount will overflow to 0, which does not require a runtime check
174 /// since the loop is exited when the loop induction variable equals the
175 /// uprounded trip-count, which are both 0.
176 Data,
177 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
178 /// calculate the mask and instead implements this with a
179 /// splat/stepvector/cmp.
180 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
181 /// active.lane.mask intrinsic when it is not natively supported?
183 /// Use predicate to control both data and control flow.
184 /// This method always requires a runtime overflow check for the i + VL
185 /// increment inside the loop, because it uses the result direclty in the
186 /// active.lane.mask to calculate the mask for the next iteration. If the
187 /// increment overflows, the mask is no longer correct.
189 /// Use predicate to control both data and control flow, but modify
190 /// the trip count so that a runtime overflow check can be avoided
191 /// and such that the scalar epilogue loop can always be removed.
193 /// Use predicated EVL instructions for tail-folding.
194 /// Indicates that VP intrinsics should be used.
196};
197
204 : TLI(TLI), LVL(LVL), IAI(IAI) {}
205};
206
207class TargetTransformInfo;
209
210/// This pass provides access to the codegen interfaces that are needed
211/// for IR-level transformations.
213public:
214 /// Construct a TTI object using a type implementing the \c Concept
215 /// API below.
216 ///
217 /// This is used by targets to construct a TTI wrapping their target-specific
218 /// implementation that encodes appropriate costs for their target.
219 template <typename T> TargetTransformInfo(T Impl);
220
221 /// Construct a baseline TTI object using a minimal implementation of
222 /// the \c Concept API below.
223 ///
224 /// The TTI implementation will reflect the information in the DataLayout
225 /// provided if non-null.
226 explicit TargetTransformInfo(const DataLayout &DL);
227
228 // Provide move semantics.
231
232 // We need to define the destructor out-of-line to define our sub-classes
233 // out-of-line.
235
236 /// Handle the invalidation of this information.
237 ///
238 /// When used as a result of \c TargetIRAnalysis this method will be called
239 /// when the function this was computed for changes. When it returns false,
240 /// the information is preserved across those changes.
243 // FIXME: We should probably in some way ensure that the subtarget
244 // information for a function hasn't changed.
245 return false;
246 }
247
248 /// \name Generic Target Information
249 /// @{
250
251 /// The kind of cost model.
252 ///
253 /// There are several different cost models that can be customized by the
254 /// target. The normalization of each cost model may be target specific.
255 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
256 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
258 TCK_RecipThroughput, ///< Reciprocal throughput.
259 TCK_Latency, ///< The latency of instruction.
260 TCK_CodeSize, ///< Instruction code size.
261 TCK_SizeAndLatency ///< The weighted sum of size and latency.
262 };
263
264 /// Underlying constants for 'cost' values in this interface.
265 ///
266 /// Many APIs in this interface return a cost. This enum defines the
267 /// fundamental values that should be used to interpret (and produce) those
268 /// costs. The costs are returned as an int rather than a member of this
269 /// enumeration because it is expected that the cost of one IR instruction
270 /// may have a multiplicative factor to it or otherwise won't fit directly
271 /// into the enum. Moreover, it is common to sum or average costs which works
272 /// better as simple integral values. Thus this enum only provides constants.
273 /// Also note that the returned costs are signed integers to make it natural
274 /// to add, subtract, and test with zero (a common boundary condition). It is
275 /// not expected that 2^32 is a realistic cost to be modeling at any point.
276 ///
277 /// Note that these costs should usually reflect the intersection of code-size
278 /// cost and execution cost. A free instruction is typically one that folds
279 /// into another instruction. For example, reg-to-reg moves can often be
280 /// skipped by renaming the registers in the CPU, but they still are encoded
281 /// and thus wouldn't be considered 'free' here.
283 TCC_Free = 0, ///< Expected to fold away in lowering.
284 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
285 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
286 };
287
288 /// Estimate the cost of a GEP operation when lowered.
289 ///
290 /// \p PointeeType is the source element type of the GEP.
291 /// \p Ptr is the base pointer operand.
292 /// \p Operands is the list of indices following the base pointer.
293 ///
294 /// \p AccessType is a hint as to what type of memory might be accessed by
295 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
296 /// folded into the addressing mode of a load/store. If AccessType is null,
297 /// then the resulting target type based off of PointeeType will be used as an
298 /// approximation.
300 getGEPCost(Type *PointeeType, const Value *Ptr,
301 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
303
304 /// Describe known properties for a set of pointers.
306 /// All the GEPs in a set have same base address.
307 unsigned IsSameBaseAddress : 1;
308 /// These properties only valid if SameBaseAddress is set.
309 /// True if all pointers are separated by a unit stride.
310 unsigned IsUnitStride : 1;
311 /// True if distance between any two neigbouring pointers is a known value.
312 unsigned IsKnownStride : 1;
313 unsigned Reserved : 29;
314
315 bool isSameBase() const { return IsSameBaseAddress; }
316 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
318
320 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
321 /*IsKnownStride=*/1, 0};
322 }
324 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
325 /*IsKnownStride=*/1, 0};
326 }
328 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
329 /*IsKnownStride=*/0, 0};
330 }
331 };
332 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
333
334 /// Estimate the cost of a chain of pointers (typically pointer operands of a
335 /// chain of loads or stores within same block) operations set when lowered.
336 /// \p AccessTy is the type of the loads/stores that will ultimately use the
337 /// \p Ptrs.
340 const PointersChainInfo &Info, Type *AccessTy,
342
343 /// \returns A value by which our inlining threshold should be multiplied.
344 /// This is primarily used to bump up the inlining threshold wholesale on
345 /// targets where calls are unusually expensive.
346 ///
347 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
348 /// individual classes of instructions would be better.
349 unsigned getInliningThresholdMultiplier() const;
350
353
354 /// \returns The bonus of inlining the last call to a static function.
356
357 /// \returns A value to be added to the inlining threshold.
358 unsigned adjustInliningThreshold(const CallBase *CB) const;
359
360 /// \returns The cost of having an Alloca in the caller if not inlined, to be
361 /// added to the threshold
362 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
363
364 /// \returns Vector bonus in percent.
365 ///
366 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
367 /// and apply this bonus based on the percentage of vector instructions. A
368 /// bonus is applied if the vector instructions exceed 50% and half that
369 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
370 /// arbitrary and evolved over time by accident as much as because they are
371 /// principled bonuses.
372 /// FIXME: It would be nice to base the bonus values on something more
373 /// scientific. A target may has no bonus on vector instructions.
375
376 /// \return the expected cost of a memcpy, which could e.g. depend on the
377 /// source/destination type and alignment and the number of bytes copied.
379
380 /// Returns the maximum memset / memcpy size in bytes that still makes it
381 /// profitable to inline the call.
383
384 /// \return The estimated number of case clusters when lowering \p 'SI'.
385 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
386 /// table.
388 unsigned &JTSize,
390 BlockFrequencyInfo *BFI) const;
391
392 /// Estimate the cost of a given IR user when lowered.
393 ///
394 /// This can estimate the cost of either a ConstantExpr or Instruction when
395 /// lowered.
396 ///
397 /// \p Operands is a list of operands which can be a result of transformations
398 /// of the current operands. The number of the operands on the list must equal
399 /// to the number of the current operands the IR user has. Their order on the
400 /// list must be the same as the order of the current operands the IR user
401 /// has.
402 ///
403 /// The returned cost is defined in terms of \c TargetCostConstants, see its
404 /// comments for a detailed explanation of the cost values.
408
409 /// This is a helper function which calls the three-argument
410 /// getInstructionCost with \p Operands which are the current operands U has.
412 TargetCostKind CostKind) const {
413 SmallVector<const Value *, 4> Operands(U->operand_values());
415 }
416
417 /// If a branch or a select condition is skewed in one direction by more than
418 /// this factor, it is very likely to be predicted correctly.
420
421 /// Returns estimated penalty of a branch misprediction in latency. Indicates
422 /// how aggressive the target wants for eliminating unpredictable branches. A
423 /// zero return value means extra optimization applied to them should be
424 /// minimal.
426
427 /// Return true if branch divergence exists.
428 ///
429 /// Branch divergence has a significantly negative impact on GPU performance
430 /// when threads in the same wavefront take different paths due to conditional
431 /// branches.
432 ///
433 /// If \p F is passed, provides a context function. If \p F is known to only
434 /// execute in a single threaded environment, the target may choose to skip
435 /// uniformity analysis and assume all values are uniform.
436 bool hasBranchDivergence(const Function *F = nullptr) const;
437
438 /// Returns whether V is a source of divergence.
439 ///
440 /// This function provides the target-dependent information for
441 /// the target-independent UniformityAnalysis.
442 bool isSourceOfDivergence(const Value *V) const;
443
444 // Returns true for the target specific
445 // set of operations which produce uniform result
446 // even taking non-uniform arguments
447 bool isAlwaysUniform(const Value *V) const;
448
449 /// Query the target whether the specified address space cast from FromAS to
450 /// ToAS is valid.
451 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
452
453 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
454 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
455
456 /// Returns the address space ID for a target's 'flat' address space. Note
457 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
458 /// refers to as the generic address space. The flat address space is a
459 /// generic address space that can be used access multiple segments of memory
460 /// with different address spaces. Access of a memory location through a
461 /// pointer with this address space is expected to be legal but slower
462 /// compared to the same memory location accessed through a pointer with a
463 /// different address space.
464 //
465 /// This is for targets with different pointer representations which can
466 /// be converted with the addrspacecast instruction. If a pointer is converted
467 /// to this address space, optimizations should attempt to replace the access
468 /// with the source address space.
469 ///
470 /// \returns ~0u if the target does not have such a flat address space to
471 /// optimize away.
472 unsigned getFlatAddressSpace() const;
473
474 /// Return any intrinsic address operand indexes which may be rewritten if
475 /// they use a flat address space pointer.
476 ///
477 /// \returns true if the intrinsic was handled.
479 Intrinsic::ID IID) const;
480
481 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
482
483 /// Return true if globals in this address space can have initializers other
484 /// than `undef`.
486
487 unsigned getAssumedAddrSpace(const Value *V) const;
488
489 bool isSingleThreaded() const;
490
491 std::pair<const Value *, unsigned>
492 getPredicatedAddrSpace(const Value *V) const;
493
494 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
495 /// NewV, which has a different address space. This should happen for every
496 /// operand index that collectFlatAddressOperands returned for the intrinsic.
497 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
498 /// new value (which may be the original \p II with modified operands).
500 Value *NewV) const;
501
502 /// Test whether calls to a function lower to actual program function
503 /// calls.
504 ///
505 /// The idea is to test whether the program is likely to require a 'call'
506 /// instruction or equivalent in order to call the given function.
507 ///
508 /// FIXME: It's not clear that this is a good or useful query API. Client's
509 /// should probably move to simpler cost metrics using the above.
510 /// Alternatively, we could split the cost interface into distinct code-size
511 /// and execution-speed costs. This would allow modelling the core of this
512 /// query more accurately as a call is a single small instruction, but
513 /// incurs significant execution cost.
514 bool isLoweredToCall(const Function *F) const;
515
516 struct LSRCost {
517 /// TODO: Some of these could be merged. Also, a lexical ordering
518 /// isn't always optimal.
519 unsigned Insns;
520 unsigned NumRegs;
521 unsigned AddRecCost;
522 unsigned NumIVMuls;
523 unsigned NumBaseAdds;
524 unsigned ImmCost;
525 unsigned SetupCost;
526 unsigned ScaleCost;
527 };
528
529 /// Parameters that control the generic loop unrolling transformation.
531 /// The cost threshold for the unrolled loop. Should be relative to the
532 /// getInstructionCost values returned by this API, and the expectation is
533 /// that the unrolled loop's instructions when run through that interface
534 /// should not exceed this cost. However, this is only an estimate. Also,
535 /// specific loops may be unrolled even with a cost above this threshold if
536 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
537 /// restriction.
538 unsigned Threshold;
539 /// If complete unrolling will reduce the cost of the loop, we will boost
540 /// the Threshold by a certain percent to allow more aggressive complete
541 /// unrolling. This value provides the maximum boost percentage that we
542 /// can apply to Threshold (The value should be no less than 100).
543 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
544 /// MaxPercentThresholdBoost / 100)
545 /// E.g. if complete unrolling reduces the loop execution time by 50%
546 /// then we boost the threshold by the factor of 2x. If unrolling is not
547 /// expected to reduce the running time, then we do not increase the
548 /// threshold.
550 /// The cost threshold for the unrolled loop when optimizing for size (set
551 /// to UINT_MAX to disable).
553 /// The cost threshold for the unrolled loop, like Threshold, but used
554 /// for partial/runtime unrolling (set to UINT_MAX to disable).
556 /// The cost threshold for the unrolled loop when optimizing for size, like
557 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
558 /// UINT_MAX to disable).
560 /// A forced unrolling factor (the number of concatenated bodies of the
561 /// original loop in the unrolled loop body). When set to 0, the unrolling
562 /// transformation will select an unrolling factor based on the current cost
563 /// threshold and other factors.
564 unsigned Count;
565 /// Default unroll count for loops with run-time trip count.
567 // Set the maximum unrolling factor. The unrolling factor may be selected
568 // using the appropriate cost threshold, but may not exceed this number
569 // (set to UINT_MAX to disable). This does not apply in cases where the
570 // loop is being fully unrolled.
571 unsigned MaxCount;
572 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
573 /// to be overrided by a target gives more flexiblity on certain cases.
574 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
576 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
577 /// applies even if full unrolling is selected. This allows a target to fall
578 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
580 // Represents number of instructions optimized when "back edge"
581 // becomes "fall through" in unrolled loop.
582 // For now we count a conditional branch on a backedge and a comparison
583 // feeding it.
584 unsigned BEInsns;
585 /// Allow partial unrolling (unrolling of loops to expand the size of the
586 /// loop body, not only to eliminate small constant-trip-count loops).
588 /// Allow runtime unrolling (unrolling of loops to expand the size of the
589 /// loop body even when the number of loop iterations is not known at
590 /// compile time).
592 /// Allow generation of a loop remainder (extra iterations after unroll).
594 /// Allow emitting expensive instructions (such as divisions) when computing
595 /// the trip count of a loop for runtime unrolling.
597 /// Apply loop unroll on any kind of loop
598 /// (mainly to loops that fail runtime unrolling).
599 bool Force;
600 /// Allow using trip count upper bound to unroll loops.
602 /// Allow unrolling of all the iterations of the runtime loop remainder.
604 /// Allow unroll and jam. Used to enable unroll and jam for the target.
606 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
607 /// value above is used during unroll and jam for the outer loop size.
608 /// This value is used in the same manner to limit the size of the inner
609 /// loop.
611 /// Don't allow loop unrolling to simulate more than this number of
612 /// iterations when checking full unroll profitability
614 /// Don't disable runtime unroll for the loops which were vectorized.
616 /// Don't allow runtime unrolling if expanding the trip count takes more
617 /// than SCEVExpansionBudget.
619 };
620
621 /// Get target-customized preferences for the generic loop unrolling
622 /// transformation. The caller will initialize UP with the current
623 /// target-independent defaults.
626 OptimizationRemarkEmitter *ORE) const;
627
628 /// Query the target whether it would be profitable to convert the given loop
629 /// into a hardware loop.
632 HardwareLoopInfo &HWLoopInfo) const;
633
634 // Query the target for which minimum vectorization factor epilogue
635 // vectorization should be considered.
636 unsigned getEpilogueVectorizationMinVF() const;
637
638 /// Query the target whether it would be prefered to create a predicated
639 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
641
642 /// Query the target what the preferred style of tail folding is.
643 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
644 /// may (or will never) overflow for the suggested VF/UF in the given loop.
645 /// Targets can use this information to select a more optimal tail folding
646 /// style. The value conservatively defaults to true, such that no assumptions
647 /// are made on overflow.
649 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
650
651 // Parameters that control the loop peeling transformation
653 /// A forced peeling factor (the number of bodied of the original loop
654 /// that should be peeled off before the loop body). When set to 0, the
655 /// a peeling factor based on profile information and other factors.
656 unsigned PeelCount;
657 /// Allow peeling off loop iterations.
659 /// Allow peeling off loop iterations for loop nests.
661 /// Allow peeling basing on profile. Uses to enable peeling off all
662 /// iterations basing on provided profile.
663 /// If the value is true the peeling cost model can decide to peel only
664 /// some iterations and in this case it will set this to false.
666 };
667
668 /// Get target-customized preferences for the generic loop peeling
669 /// transformation. The caller will initialize \p PP with the current
670 /// target-independent defaults with information from \p L and \p SE.
672 PeelingPreferences &PP) const;
673
674 /// Targets can implement their own combinations for target-specific
675 /// intrinsics. This function will be called from the InstCombine pass every
676 /// time a target-specific intrinsic is encountered.
677 ///
678 /// \returns std::nullopt to not do anything target specific or a value that
679 /// will be returned from the InstCombiner. It is possible to return null and
680 /// stop further processing of the intrinsic by returning nullptr.
681 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
682 IntrinsicInst & II) const;
683 /// Can be used to implement target-specific instruction combining.
684 /// \see instCombineIntrinsic
685 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
686 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
687 KnownBits & Known, bool &KnownBitsComputed) const;
688 /// Can be used to implement target-specific instruction combining.
689 /// \see instCombineIntrinsic
690 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
691 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
692 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
693 std::function<void(Instruction *, unsigned, APInt, APInt &)>
694 SimplifyAndSetOp) const;
695 /// @}
696
697 /// \name Scalar Target Information
698 /// @{
699
700 /// Flags indicating the kind of support for population count.
701 ///
702 /// Compared to the SW implementation, HW support is supposed to
703 /// significantly boost the performance when the population is dense, and it
704 /// may or may not degrade performance if the population is sparse. A HW
705 /// support is considered as "Fast" if it can outperform, or is on a par
706 /// with, SW implementation when the population is sparse; otherwise, it is
707 /// considered as "Slow".
709
710 /// Return true if the specified immediate is legal add immediate, that
711 /// is the target has add instructions which can add a register with the
712 /// immediate without having to materialize the immediate into a register.
713 bool isLegalAddImmediate(int64_t Imm) const;
714
715 /// Return true if adding the specified scalable immediate is legal, that is
716 /// the target has add instructions which can add a register with the
717 /// immediate (multiplied by vscale) without having to materialize the
718 /// immediate into a register.
719 bool isLegalAddScalableImmediate(int64_t Imm) const;
720
721 /// Return true if the specified immediate is legal icmp immediate,
722 /// that is the target has icmp instructions which can compare a register
723 /// against the immediate without having to materialize the immediate into a
724 /// register.
725 bool isLegalICmpImmediate(int64_t Imm) const;
726
727 /// Return true if the addressing mode represented by AM is legal for
728 /// this target, for a load/store of the specified type.
729 /// The type may be VoidTy, in which case only return true if the addressing
730 /// mode is legal for a load/store of any legal type.
731 /// If target returns true in LSRWithInstrQueries(), I may be valid.
732 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
733 /// an invariant value known only at runtime. Most targets should not accept
734 /// a scalable offset.
735 ///
736 /// TODO: Handle pre/postinc as well.
737 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
738 bool HasBaseReg, int64_t Scale,
739 unsigned AddrSpace = 0, Instruction *I = nullptr,
740 int64_t ScalableOffset = 0) const;
741
742 /// Return true if LSR cost of C1 is lower than C2.
744 const TargetTransformInfo::LSRCost &C2) const;
745
746 /// Return true if LSR major cost is number of registers. Targets which
747 /// implement their own isLSRCostLess and unset number of registers as major
748 /// cost should return false, otherwise return true.
749 bool isNumRegsMajorCostOfLSR() const;
750
751 /// Return true if LSR should drop a found solution if it's calculated to be
752 /// less profitable than the baseline.
754
755 /// \returns true if LSR should not optimize a chain that includes \p I.
757
758 /// Return true if the target can fuse a compare and branch.
759 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
760 /// calculation for the instructions in a loop.
761 bool canMacroFuseCmp() const;
762
763 /// Return true if the target can save a compare for loop count, for example
764 /// hardware loop saves a compare.
765 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
767 TargetLibraryInfo *LibInfo) const;
768
773 };
774
775 /// Return the preferred addressing mode LSR should make efforts to generate.
777 ScalarEvolution *SE) const;
778
779 /// Return true if the target supports masked store.
780 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
781 /// Return true if the target supports masked load.
782 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
783
784 /// Return true if the target supports nontemporal store.
785 bool isLegalNTStore(Type *DataType, Align Alignment) const;
786 /// Return true if the target supports nontemporal load.
787 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
788
789 /// \Returns true if the target supports broadcasting a load to a vector of
790 /// type <NumElements x ElementTy>.
791 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
792
793 /// Return true if the target supports masked scatter.
794 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
795 /// Return true if the target supports masked gather.
796 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
797 /// Return true if the target forces scalarizing of llvm.masked.gather
798 /// intrinsics.
799 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
800 /// Return true if the target forces scalarizing of llvm.masked.scatter
801 /// intrinsics.
802 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
803
804 /// Return true if the target supports masked compress store.
805 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
806 /// Return true if the target supports masked expand load.
807 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
808
809 /// Return true if the target supports strided load.
810 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
811
812 /// Return true is the target supports interleaved access for the given vector
813 /// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
814 /// address space \p AddrSpace.
815 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
816 Align Alignment, unsigned AddrSpace) const;
817
818 // Return true if the target supports masked vector histograms.
819 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const;
820
821 /// Return true if this is an alternating opcode pattern that can be lowered
822 /// to a single instruction on the target. In X86 this is for the addsub
823 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
824 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
825 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
826 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
827 /// \p VecTy is the vector type of the instruction to be generated.
828 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
829 const SmallBitVector &OpcodeMask) const;
830
831 /// Return true if we should be enabling ordered reductions for the target.
832 bool enableOrderedReductions() const;
833
834 /// Return true if the target has a unified operation to calculate division
835 /// and remainder. If so, the additional implicit multiplication and
836 /// subtraction required to calculate a remainder from division are free. This
837 /// can enable more aggressive transformations for division and remainder than
838 /// would typically be allowed using throughput or size cost models.
839 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
840
841 /// Return true if the given instruction (assumed to be a memory access
842 /// instruction) has a volatile variant. If that's the case then we can avoid
843 /// addrspacecast to generic AS for volatile loads/stores. Default
844 /// implementation returns false, which prevents address space inference for
845 /// volatile loads/stores.
846 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
847
848 /// Return true if target doesn't mind addresses in vectors.
849 bool prefersVectorizedAddressing() const;
850
851 /// Return the cost of the scaling factor used in the addressing
852 /// mode represented by AM for this target, for a load/store
853 /// of the specified type.
854 /// If the AM is supported, the return value must be >= 0.
855 /// If the AM is not supported, it returns a negative value.
856 /// TODO: Handle pre/postinc as well.
858 StackOffset BaseOffset, bool HasBaseReg,
859 int64_t Scale,
860 unsigned AddrSpace = 0) const;
861
862 /// Return true if the loop strength reduce pass should make
863 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
864 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
865 /// immediate offset and no index register.
866 bool LSRWithInstrQueries() const;
867
868 /// Return true if it's free to truncate a value of type Ty1 to type
869 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
870 /// by referencing its sub-register AX.
871 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
872
873 /// Return true if it is profitable to hoist instruction in the
874 /// then/else to before if.
875 bool isProfitableToHoist(Instruction *I) const;
876
877 bool useAA() const;
878
879 /// Return true if this type is legal.
880 bool isTypeLegal(Type *Ty) const;
881
882 /// Returns the estimated number of registers required to represent \p Ty.
883 unsigned getRegUsageForType(Type *Ty) const;
884
885 /// Return true if switches should be turned into lookup tables for the
886 /// target.
887 bool shouldBuildLookupTables() const;
888
889 /// Return true if switches should be turned into lookup tables
890 /// containing this constant value for the target.
892
893 /// Return true if lookup tables should be turned into relative lookup tables.
894 bool shouldBuildRelLookupTables() const;
895
896 /// Return true if the input function which is cold at all call sites,
897 /// should use coldcc calling convention.
898 bool useColdCCForColdCall(Function &F) const;
899
901
902 /// Identifies if the vector form of the intrinsic has a scalar operand.
904 unsigned ScalarOpdIdx) const;
905
906 /// Identifies if the vector form of the intrinsic is overloaded on the type
907 /// of the operand at index \p OpdIdx, or on the return type if \p OpdIdx is
908 /// -1.
910 int OpdIdx) const;
911
912 /// Identifies if the vector form of the intrinsic that returns a struct is
913 /// overloaded at the struct element index \p RetIdx.
915 int RetIdx) const;
916
917 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
918 /// are set if the demanded result elements need to be inserted and/or
919 /// extracted from vectors. The involved values may be passed in VL if
920 /// Insert is true.
922 const APInt &DemandedElts,
923 bool Insert, bool Extract,
925 ArrayRef<Value *> VL = {}) const;
926
927 /// Estimate the overhead of scalarizing an instructions unique
928 /// non-constant operands. The (potentially vector) types to use for each of
929 /// argument are passes via Tys.
930 InstructionCost
931 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
932 ArrayRef<Type *> Tys,
934
935 /// If target has efficient vector element load/store instructions, it can
936 /// return true here so that insertion/extraction costs are not added to
937 /// the scalarization cost of a load/store.
939
940 /// If the target supports tail calls.
941 bool supportsTailCalls() const;
942
943 /// If target supports tail call on \p CB
944 bool supportsTailCallFor(const CallBase *CB) const;
945
946 /// Don't restrict interleaved unrolling to small loops.
947 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
948
949 /// Returns options for expansion of memcmp. IsZeroCmp is
950 // true if this is the expansion of memcmp(p1, p2, s) == 0.
952 // Return true if memcmp expansion is enabled.
953 operator bool() const { return MaxNumLoads > 0; }
954
955 // Maximum number of load operations.
956 unsigned MaxNumLoads = 0;
957
958 // The list of available load sizes (in bytes), sorted in decreasing order.
960
961 // For memcmp expansion when the memcmp result is only compared equal or
962 // not-equal to 0, allow up to this number of load pairs per block. As an
963 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
964 // a0 = load2bytes &a[0]
965 // b0 = load2bytes &b[0]
966 // a2 = load1byte &a[2]
967 // b2 = load1byte &b[2]
968 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
969 unsigned NumLoadsPerBlock = 1;
970
971 // Set to true to allow overlapping loads. For example, 7-byte compares can
972 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
973 // requires all loads in LoadSizes to be doable in an unaligned way.
975
976 // Sometimes, the amount of data that needs to be compared is smaller than
977 // the standard register size, but it cannot be loaded with just one load
978 // instruction. For example, if the size of the memory comparison is 6
979 // bytes, we can handle it more efficiently by loading all 6 bytes in a
980 // single block and generating an 8-byte number, instead of generating two
981 // separate blocks with conditional jumps for 4 and 2 byte loads. This
982 // approach simplifies the process and produces the comparison result as
983 // normal. This array lists the allowed sizes of memcmp tails that can be
984 // merged into one block
986 };
988 bool IsZeroCmp) const;
989
990 /// Should the Select Optimization pass be enabled and ran.
991 bool enableSelectOptimize() const;
992
993 /// Should the Select Optimization pass treat the given instruction like a
994 /// select, potentially converting it to a conditional branch. This can
995 /// include select-like instructions like or(zext(c), x) that can be converted
996 /// to selects.
998
999 /// Enable matching of interleaved access groups.
1001
1002 /// Enable matching of interleaved access groups that contain predicated
1003 /// accesses or gaps and therefore vectorized using masked
1004 /// vector loads/stores.
1006
1007 /// Indicate that it is potentially unsafe to automatically vectorize
1008 /// floating-point operations because the semantics of vector and scalar
1009 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
1010 /// does not support IEEE-754 denormal numbers, while depending on the
1011 /// platform, scalar floating-point math does.
1012 /// This applies to floating-point math operations and calls, not memory
1013 /// operations, shuffles, or casts.
1015
1016 /// Determine if the target supports unaligned memory accesses.
1017 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1018 unsigned AddressSpace = 0,
1019 Align Alignment = Align(1),
1020 unsigned *Fast = nullptr) const;
1021
1022 /// Return hardware support for population count.
1023 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
1024
1025 /// Return true if the hardware has a fast square-root instruction.
1026 bool haveFastSqrt(Type *Ty) const;
1027
1028 /// Return true if the cost of the instruction is too high to speculatively
1029 /// execute and should be kept behind a branch.
1030 /// This normally just wraps around a getInstructionCost() call, but some
1031 /// targets might report a low TCK_SizeAndLatency value that is incompatible
1032 /// with the fixed TCC_Expensive value.
1033 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1035
1036 /// Return true if it is faster to check if a floating-point value is NaN
1037 /// (or not-NaN) versus a comparison against a constant FP zero value.
1038 /// Targets should override this if materializing a 0.0 for comparison is
1039 /// generally as cheap as checking for ordered/unordered.
1040 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1041
1042 /// Return the expected cost of supporting the floating point operation
1043 /// of the specified type.
1044 InstructionCost getFPOpCost(Type *Ty) const;
1045
1046 /// Return the expected cost of materializing for the given integer
1047 /// immediate of the specified type.
1048 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1049 TargetCostKind CostKind) const;
1050
1051 /// Return the expected cost of materialization for the given integer
1052 /// immediate of the specified type for a given instruction. The cost can be
1053 /// zero if the immediate can be folded into the specified instruction.
1054 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1055 const APInt &Imm, Type *Ty,
1057 Instruction *Inst = nullptr) const;
1059 const APInt &Imm, Type *Ty,
1060 TargetCostKind CostKind) const;
1061
1062 /// Return the expected cost for the given integer when optimising
1063 /// for size. This is different than the other integer immediate cost
1064 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1065 /// target one ISA such as Aarch32 but smaller encodings could be possible
1066 /// with another such as Thumb. This return value is used as a penalty when
1067 /// the total costs for a constant is calculated (the bigger the cost, the
1068 /// more beneficial constant hoisting is).
1069 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1070 const APInt &Imm, Type *Ty) const;
1071
1072 /// It can be advantageous to detach complex constants from their uses to make
1073 /// their generation cheaper. This hook allows targets to report when such
1074 /// transformations might negatively effect the code generation of the
1075 /// underlying operation. The motivating example is divides whereby hoisting
1076 /// constants prevents the code generator's ability to transform them into
1077 /// combinations of simpler operations.
1079 const Function &Fn) const;
1080
1081 /// @}
1082
1083 /// \name Vector Target Information
1084 /// @{
1085
1086 /// The various kinds of shuffle patterns for vector queries.
1088 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1089 SK_Reverse, ///< Reverse the order of the vector.
1090 SK_Select, ///< Selects elements from the corresponding lane of
1091 ///< either source operand. This is equivalent to a
1092 ///< vector select with a constant condition operand.
1093 SK_Transpose, ///< Transpose two vectors.
1094 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1095 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1096 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1097 ///< with any shuffle mask.
1098 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1099 ///< shuffle mask.
1100 SK_Splice ///< Concatenates elements from the first input vector
1101 ///< with elements of the second input vector. Returning
1102 ///< a vector of the same type as the input vectors.
1103 ///< Index indicates start offset in first input vector.
1105
1106 /// Additional information about an operand's possible values.
1108 OK_AnyValue, // Operand can have any value.
1109 OK_UniformValue, // Operand is uniform (splat of a value).
1110 OK_UniformConstantValue, // Operand is uniform constant.
1111 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1113
1114 /// Additional properties of an operand's values.
1119 };
1120
1121 // Describe the values an operand can take. We're in the process
1122 // of migrating uses of OperandValueKind and OperandValueProperties
1123 // to use this class, and then will change the internal representation.
1127
1128 bool isConstant() const {
1130 }
1131 bool isUniform() const {
1133 }
1134 bool isPowerOf2() const {
1135 return Properties == OP_PowerOf2;
1136 }
1137 bool isNegatedPowerOf2() const {
1139 }
1140
1142 return {Kind, OP_None};
1143 }
1144 };
1145
1146 /// \return the number of registers in the target-provided register class.
1147 unsigned getNumberOfRegisters(unsigned ClassID) const;
1148
1149 /// \return true if the target supports load/store that enables fault
1150 /// suppression of memory operands when the source condition is false.
1151 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
1152
1153 /// \return the target-provided register class ID for the provided type,
1154 /// accounting for type promotion and other type-legalization techniques that
1155 /// the target might apply. However, it specifically does not account for the
1156 /// scalarization or splitting of vector types. Should a vector type require
1157 /// scalarization or splitting into multiple underlying vector registers, that
1158 /// type should be mapped to a register class containing no registers.
1159 /// Specifically, this is designed to provide a simple, high-level view of the
1160 /// register allocation later performed by the backend. These register classes
1161 /// don't necessarily map onto the register classes used by the backend.
1162 /// FIXME: It's not currently possible to determine how many registers
1163 /// are used by the provided type.
1164 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1165
1166 /// \return the target-provided register class name
1167 const char *getRegisterClassName(unsigned ClassID) const;
1168
1170
1171 /// \return The width of the largest scalar or vector register type.
1173
1174 /// \return The width of the smallest vector register type.
1175 unsigned getMinVectorRegisterBitWidth() const;
1176
1177 /// \return The maximum value of vscale if the target specifies an
1178 /// architectural maximum vector length, and std::nullopt otherwise.
1179 std::optional<unsigned> getMaxVScale() const;
1180
1181 /// \return the value of vscale to tune the cost model for.
1182 std::optional<unsigned> getVScaleForTuning() const;
1183
1184 /// \return true if vscale is known to be a power of 2
1185 bool isVScaleKnownToBeAPowerOfTwo() const;
1186
1187 /// \return True if the vectorization factor should be chosen to
1188 /// make the vector of the smallest element type match the size of a
1189 /// vector register. For wider element types, this could result in
1190 /// creating vectors that span multiple vector registers.
1191 /// If false, the vectorization factor will be chosen based on the
1192 /// size of the widest element type.
1193 /// \p K Register Kind for vectorization.
1195
1196 /// \return The minimum vectorization factor for types of given element
1197 /// bit width, or 0 if there is no minimum VF. The returned value only
1198 /// applies when shouldMaximizeVectorBandwidth returns true.
1199 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1200 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1201
1202 /// \return The maximum vectorization factor for types of given element
1203 /// bit width and opcode, or 0 if there is no maximum VF.
1204 /// Currently only used by the SLP vectorizer.
1205 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1206
1207 /// \return The minimum vectorization factor for the store instruction. Given
1208 /// the initial estimation of the minimum vector factor and store value type,
1209 /// it tries to find possible lowest VF, which still might be profitable for
1210 /// the vectorization.
1211 /// \param VF Initial estimation of the minimum vector factor.
1212 /// \param ScalarMemTy Scalar memory type of the store operation.
1213 /// \param ScalarValTy Scalar type of the stored value.
1214 /// Currently only used by the SLP vectorizer.
1215 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1216 Type *ScalarValTy) const;
1217
1218 /// \return True if it should be considered for address type promotion.
1219 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1220 /// profitable without finding other extensions fed by the same input.
1222 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1223
1224 /// \return The size of a cache line in bytes.
1225 unsigned getCacheLineSize() const;
1226
1227 /// The possible cache levels
1228 enum class CacheLevel {
1229 L1D, // The L1 data cache
1230 L2D, // The L2 data cache
1231
1232 // We currently do not model L3 caches, as their sizes differ widely between
1233 // microarchitectures. Also, we currently do not have a use for L3 cache
1234 // size modeling yet.
1235 };
1236
1237 /// \return The size of the cache level in bytes, if available.
1238 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1239
1240 /// \return The associativity of the cache level, if available.
1241 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1242
1243 /// \return The minimum architectural page size for the target.
1244 std::optional<unsigned> getMinPageSize() const;
1245
1246 /// \return How much before a load we should place the prefetch
1247 /// instruction. This is currently measured in number of
1248 /// instructions.
1249 unsigned getPrefetchDistance() const;
1250
1251 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1252 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1253 /// and the arguments provided are meant to serve as a basis for deciding this
1254 /// for a particular loop.
1255 ///
1256 /// \param NumMemAccesses Number of memory accesses in the loop.
1257 /// \param NumStridedMemAccesses Number of the memory accesses that
1258 /// ScalarEvolution could find a known stride
1259 /// for.
1260 /// \param NumPrefetches Number of software prefetches that will be
1261 /// emitted as determined by the addresses
1262 /// involved and the cache line size.
1263 /// \param HasCall True if the loop contains a call.
1264 ///
1265 /// \return This is the minimum stride in bytes where it makes sense to start
1266 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1267 /// stride.
1268 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1269 unsigned NumStridedMemAccesses,
1270 unsigned NumPrefetches, bool HasCall) const;
1271
1272 /// \return The maximum number of iterations to prefetch ahead. If
1273 /// the required number of iterations is more than this number, no
1274 /// prefetching is performed.
1275 unsigned getMaxPrefetchIterationsAhead() const;
1276
1277 /// \return True if prefetching should also be done for writes.
1278 bool enableWritePrefetching() const;
1279
1280 /// \return if target want to issue a prefetch in address space \p AS.
1281 bool shouldPrefetchAddressSpace(unsigned AS) const;
1282
1283 /// \return The maximum interleave factor that any transform should try to
1284 /// perform for this target. This number depends on the level of parallelism
1285 /// and the number of execution units in the CPU.
1286 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1287
1288 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1289 static OperandValueInfo getOperandInfo(const Value *V);
1290
1291 /// This is an approximation of reciprocal throughput of a math/logic op.
1292 /// A higher cost indicates less expected throughput.
1293 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1294 /// clock cycles per instruction when the instructions are not part of a
1295 /// limiting dependency chain."
1296 /// Therefore, costs should be scaled to account for multiple execution units
1297 /// on the target that can process this type of instruction. For example, if
1298 /// there are 5 scalar integer units and 2 vector integer units that can
1299 /// calculate an 'add' in a single cycle, this model should indicate that the
1300 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1301 /// add instruction.
1302 /// \p Args is an optional argument which holds the instruction operands
1303 /// values so the TTI can analyze those values searching for special
1304 /// cases or optimizations based on those values.
1305 /// \p CxtI is the optional original context instruction, if one exists, to
1306 /// provide even more information.
1307 /// \p TLibInfo is used to search for platform specific vector library
1308 /// functions for instructions that might be converted to calls (e.g. frem).
1310 unsigned Opcode, Type *Ty,
1313 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1314 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr,
1315 const TargetLibraryInfo *TLibInfo = nullptr) const;
1316
1317 /// Returns the cost estimation for alternating opcode pattern that can be
1318 /// lowered to a single instruction on the target. In X86 this is for the
1319 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1320 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1321 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1322 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1323 /// \p VecTy is the vector type of the instruction to be generated.
1324 InstructionCost getAltInstrCost(
1325 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1326 const SmallBitVector &OpcodeMask,
1328
1329 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1330 /// The exact mask may be passed as Mask, or else the array will be empty.
1331 /// The index and subtype parameters are used by the subvector insertion and
1332 /// extraction shuffle kinds to show the insert/extract point and the type of
1333 /// the subvector being inserted/extracted. The operands of the shuffle can be
1334 /// passed through \p Args, which helps improve the cost estimation in some
1335 /// cases, like in broadcast loads.
1336 /// NOTE: For subvector extractions Tp represents the source type.
1337 InstructionCost
1338 getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
1340 int Index = 0, VectorType *SubTp = nullptr,
1341 ArrayRef<const Value *> Args = {},
1342 const Instruction *CxtI = nullptr) const;
1343
1344 /// Represents a hint about the context in which a cast is used.
1345 ///
1346 /// For zext/sext, the context of the cast is the operand, which must be a
1347 /// load of some kind. For trunc, the context is of the cast is the single
1348 /// user of the instruction, which must be a store of some kind.
1349 ///
1350 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1351 /// type of cast it's dealing with, as not every cast is equal. For instance,
1352 /// the zext of a load may be free, but the zext of an interleaving load can
1353 //// be (very) expensive!
1354 ///
1355 /// See \c getCastContextHint to compute a CastContextHint from a cast
1356 /// Instruction*. Callers can use it if they don't need to override the
1357 /// context and just want it to be calculated from the instruction.
1358 ///
1359 /// FIXME: This handles the types of load/store that the vectorizer can
1360 /// produce, which are the cases where the context instruction is most
1361 /// likely to be incorrect. There are other situations where that can happen
1362 /// too, which might be handled here but in the long run a more general
1363 /// solution of costing multiple instructions at the same times may be better.
1365 None, ///< The cast is not used with a load/store of any kind.
1366 Normal, ///< The cast is used with a normal load/store.
1367 Masked, ///< The cast is used with a masked load/store.
1368 GatherScatter, ///< The cast is used with a gather/scatter.
1369 Interleave, ///< The cast is used with an interleaved load/store.
1370 Reversed, ///< The cast is used with a reversed load/store.
1371 };
1372
1373 /// Calculates a CastContextHint from \p I.
1374 /// This should be used by callers of getCastInstrCost if they wish to
1375 /// determine the context from some instruction.
1376 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1377 /// or if it's another type of cast.
1379
1380 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1381 /// zext, etc. If there is an existing instruction that holds Opcode, it
1382 /// may be passed in the 'I' parameter.
1384 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1387 const Instruction *I = nullptr) const;
1388
1389 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1390 /// Index = -1 to indicate that there is no information about the index value.
1391 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1392 VectorType *VecTy,
1393 unsigned Index) const;
1394
1395 /// \return The expected cost of control-flow related instructions such as
1396 /// Phi, Ret, Br, Switch.
1398 getCFInstrCost(unsigned Opcode,
1400 const Instruction *I = nullptr) const;
1401
1402 /// \returns The expected cost of compare and select instructions. If there
1403 /// is an existing instruction that holds Opcode, it may be passed in the
1404 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1405 /// is using a compare with the specified predicate as condition. When vector
1406 /// types are passed, \p VecPred must be used for all lanes. For a
1407 /// comparison, the two operands are the natural values. For a select, the
1408 /// two operands are the *value* operands, not the condition operand.
1410 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1411 CmpInst::Predicate VecPred,
1413 OperandValueInfo Op1Info = {OK_AnyValue, OP_None},
1414 OperandValueInfo Op2Info = {OK_AnyValue, OP_None},
1415 const Instruction *I = nullptr) const;
1416
1417 /// \return The expected cost of vector Insert and Extract.
1418 /// Use -1 to indicate that there is no information on the index value.
1419 /// This is used when the instruction is not available; a typical use
1420 /// case is to provision the cost of vectorization/scalarization in
1421 /// vectorizer passes.
1422 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1424 unsigned Index = -1, Value *Op0 = nullptr,
1425 Value *Op1 = nullptr) const;
1426
1427 /// \return The expected cost of vector Insert and Extract.
1428 /// Use -1 to indicate that there is no information on the index value.
1429 /// This is used when the instruction is not available; a typical use
1430 /// case is to provision the cost of vectorization/scalarization in
1431 /// vectorizer passes.
1432 /// \param ScalarUserAndIdx encodes the information about extracts from a
1433 /// vector with 'Scalar' being the value being extracted,'User' being the user
1434 /// of the extract(nullptr if user is not known before vectorization) and
1435 /// 'Idx' being the extract lane.
1436 InstructionCost getVectorInstrCost(
1437 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
1438 Value *Scalar,
1439 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const;
1440
1441 /// \return The expected cost of vector Insert and Extract.
1442 /// This is used when instruction is available, and implementation
1443 /// asserts 'I' is not nullptr.
1444 ///
1445 /// A typical suitable use case is cost estimation when vector instruction
1446 /// exists (e.g., from basic blocks during transformation).
1447 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1449 unsigned Index = -1) const;
1450
1451 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1452 /// \p ReplicationFactor times.
1453 ///
1454 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1455 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1456 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1457 int VF,
1458 const APInt &DemandedDstElts,
1460
1461 /// \return The cost of Load and Store instructions.
1462 InstructionCost
1463 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1464 unsigned AddressSpace,
1466 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1467 const Instruction *I = nullptr) const;
1468
1469 /// \return The cost of VP Load and Store instructions.
1470 InstructionCost
1471 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1472 unsigned AddressSpace,
1474 const Instruction *I = nullptr) const;
1475
1476 /// \return The cost of masked Load and Store instructions.
1478 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1480
1481 /// \return The cost of Gather or Scatter operation
1482 /// \p Opcode - is a type of memory access Load or Store
1483 /// \p DataTy - a vector type of the data to be loaded or stored
1484 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1485 /// \p VariableMask - true when the memory access is predicated with a mask
1486 /// that is not a compile-time constant
1487 /// \p Alignment - alignment of single element
1488 /// \p I - the optional original context instruction, if one exists, e.g. the
1489 /// load/store to transform or the call to the gather/scatter intrinsic
1491 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1493 const Instruction *I = nullptr) const;
1494
1495 /// \return The cost of strided memory operations.
1496 /// \p Opcode - is a type of memory access Load or Store
1497 /// \p DataTy - a vector type of the data to be loaded or stored
1498 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1499 /// \p VariableMask - true when the memory access is predicated with a mask
1500 /// that is not a compile-time constant
1501 /// \p Alignment - alignment of single element
1502 /// \p I - the optional original context instruction, if one exists, e.g. the
1503 /// load/store to transform or the call to the gather/scatter intrinsic
1505 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1507 const Instruction *I = nullptr) const;
1508
1509 /// \return The cost of the interleaved memory operation.
1510 /// \p Opcode is the memory operation code
1511 /// \p VecTy is the vector type of the interleaved access.
1512 /// \p Factor is the interleave factor
1513 /// \p Indices is the indices for interleaved load members (as interleaved
1514 /// load allows gaps)
1515 /// \p Alignment is the alignment of the memory operation
1516 /// \p AddressSpace is address space of the pointer.
1517 /// \p UseMaskForCond indicates if the memory access is predicated.
1518 /// \p UseMaskForGaps indicates if gaps should be masked.
1520 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1521 Align Alignment, unsigned AddressSpace,
1523 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1524
1525 /// A helper function to determine the type of reduction algorithm used
1526 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1527 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1528 return FMF && !(*FMF).allowReassoc();
1529 }
1530
1531 /// Calculate the cost of vector reduction intrinsics.
1532 ///
1533 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1534 /// value using the operation denoted by \p Opcode. The FastMathFlags
1535 /// parameter \p FMF indicates what type of reduction we are performing:
1536 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1537 /// involves successively splitting a vector into half and doing the
1538 /// operation on the pair of halves until you have a scalar value. For
1539 /// example:
1540 /// (v0, v1, v2, v3)
1541 /// ((v0+v2), (v1+v3), undef, undef)
1542 /// ((v0+v2+v1+v3), undef, undef, undef)
1543 /// This is the default behaviour for integer operations, whereas for
1544 /// floating point we only do this if \p FMF indicates that
1545 /// reassociation is allowed.
1546 /// 2. Ordered. For a vector with N elements this involves performing N
1547 /// operations in lane order, starting with an initial scalar value, i.e.
1548 /// result = InitVal + v0
1549 /// result = result + v1
1550 /// result = result + v2
1551 /// result = result + v3
1552 /// This is only the case for FP operations and when reassociation is not
1553 /// allowed.
1554 ///
1556 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1558
1562
1563 /// Calculate the cost of an extended reduction pattern, similar to
1564 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1565 /// extensions. This is the cost of as:
1566 /// ResTy vecreduce.add(mul (A, B)).
1567 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1569 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1571
1572 /// Calculate the cost of an extended reduction pattern, similar to
1573 /// getArithmeticReductionCost of a reduction with an extension.
1574 /// This is the cost of as:
1575 /// ResTy vecreduce.opcode(ext(Ty A)).
1577 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1578 FastMathFlags FMF,
1580
1581 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1582 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1583 /// 3. scalar instruction which is to be vectorized.
1586
1587 /// \returns The cost of Call instructions.
1591
1592 /// \returns The number of pieces into which the provided type must be
1593 /// split during legalization. Zero is returned when the answer is unknown.
1594 unsigned getNumberOfParts(Type *Tp) const;
1595
1596 /// \returns The cost of the address computation. For most targets this can be
1597 /// merged into the instruction indexing mode. Some targets might want to
1598 /// distinguish between address computation for memory operations on vector
1599 /// types and scalar types. Such targets should override this function.
1600 /// The 'SE' parameter holds pointer for the scalar evolution object which
1601 /// is used in order to get the Ptr step value in case of constant stride.
1602 /// The 'Ptr' parameter holds SCEV of the access pointer.
1604 ScalarEvolution *SE = nullptr,
1605 const SCEV *Ptr = nullptr) const;
1606
1607 /// \returns The cost, if any, of keeping values of the given types alive
1608 /// over a callsite.
1609 ///
1610 /// Some types may require the use of register classes that do not have
1611 /// any callee-saved registers, so would require a spill and fill.
1613
1614 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1615 /// will contain additional information - whether the intrinsic may write
1616 /// or read to memory, volatility and the pointer. Info is undefined
1617 /// if false is returned.
1619
1620 /// \returns The maximum element size, in bytes, for an element
1621 /// unordered-atomic memory intrinsic.
1622 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1623
1624 /// \returns A value which is the result of the given memory intrinsic. New
1625 /// instructions may be created to extract the result from the given intrinsic
1626 /// memory operation. Returns nullptr if the target cannot create a result
1627 /// from the given intrinsic.
1629 Type *ExpectedType) const;
1630
1631 /// \returns The type to use in a loop expansion of a memcpy call.
1633 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1634 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
1635 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1636
1637 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1638 /// \param RemainingBytes The number of bytes to copy.
1639 ///
1640 /// Calculates the operand types to use when copying \p RemainingBytes of
1641 /// memory, where source and destination alignments are \p SrcAlign and
1642 /// \p DestAlign respectively.
1644 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1645 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1646 Align SrcAlign, Align DestAlign,
1647 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1648
1649 /// \returns True if the two functions have compatible attributes for inlining
1650 /// purposes.
1651 bool areInlineCompatible(const Function *Caller,
1652 const Function *Callee) const;
1653
1654 /// Returns a penalty for invoking call \p Call in \p F.
1655 /// For example, if a function F calls a function G, which in turn calls
1656 /// function H, then getInlineCallPenalty(F, H()) would return the
1657 /// penalty of calling H from F, e.g. after inlining G into F.
1658 /// \p DefaultCallPenalty is passed to give a default penalty that
1659 /// the target can amend or override.
1660 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1661 unsigned DefaultCallPenalty) const;
1662
1663 /// \returns True if the caller and callee agree on how \p Types will be
1664 /// passed to or returned from the callee.
1665 /// to the callee.
1666 /// \param Types List of types to check.
1667 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1668 const ArrayRef<Type *> &Types) const;
1669
1670 /// The type of load/store indexing.
1672 MIM_Unindexed, ///< No indexing.
1673 MIM_PreInc, ///< Pre-incrementing.
1674 MIM_PreDec, ///< Pre-decrementing.
1675 MIM_PostInc, ///< Post-incrementing.
1676 MIM_PostDec ///< Post-decrementing.
1678
1679 /// \returns True if the specified indexed load for the given type is legal.
1680 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1681
1682 /// \returns True if the specified indexed store for the given type is legal.
1683 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1684
1685 /// \returns The bitwidth of the largest vector type that should be used to
1686 /// load/store in the given address space.
1687 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1688
1689 /// \returns True if the load instruction is legal to vectorize.
1690 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1691
1692 /// \returns True if the store instruction is legal to vectorize.
1693 bool isLegalToVectorizeStore(StoreInst *SI) const;
1694
1695 /// \returns True if it is legal to vectorize the given load chain.
1696 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1697 unsigned AddrSpace) const;
1698
1699 /// \returns True if it is legal to vectorize the given store chain.
1700 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1701 unsigned AddrSpace) const;
1702
1703 /// \returns True if it is legal to vectorize the given reduction kind.
1705 ElementCount VF) const;
1706
1707 /// \returns True if the given type is supported for scalable vectors
1709
1710 /// \returns The new vector factor value if the target doesn't support \p
1711 /// SizeInBytes loads or has a better vector factor.
1712 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1713 unsigned ChainSizeInBytes,
1714 VectorType *VecTy) const;
1715
1716 /// \returns The new vector factor value if the target doesn't support \p
1717 /// SizeInBytes stores or has a better vector factor.
1718 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1719 unsigned ChainSizeInBytes,
1720 VectorType *VecTy) const;
1721
1722 /// Flags describing the kind of vector reduction.
1724 ReductionFlags() = default;
1725 bool IsMaxOp =
1726 false; ///< If the op a min/max kind, true if it's a max operation.
1727 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1728 bool NoNaN =
1729 false; ///< If op is an fp min/max, whether NaNs may be present.
1730 };
1731
1732 /// \returns True if the targets prefers fixed width vectorization if the
1733 /// loop vectorizer's cost-model assigns an equal cost to the fixed and
1734 /// scalable version of the vectorized loop.
1736
1737 /// \returns True if the target prefers reductions in loop.
1738 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1739 ReductionFlags Flags) const;
1740
1741 /// \returns True if the target prefers reductions select kept in the loop
1742 /// when tail folding. i.e.
1743 /// loop:
1744 /// p = phi (0, s)
1745 /// a = add (p, x)
1746 /// s = select (mask, a, p)
1747 /// vecreduce.add(s)
1748 ///
1749 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1750 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1751 /// by the target, this can lead to cleaner code generation.
1752 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1753 ReductionFlags Flags) const;
1754
1755 /// Return true if the loop vectorizer should consider vectorizing an
1756 /// otherwise scalar epilogue loop.
1757 bool preferEpilogueVectorization() const;
1758
1759 /// \returns True if the target wants to expand the given reduction intrinsic
1760 /// into a shuffle sequence.
1761 bool shouldExpandReduction(const IntrinsicInst *II) const;
1762
1764
1765 /// \returns The shuffle sequence pattern used to expand the given reduction
1766 /// intrinsic.
1769
1770 /// \returns the size cost of rematerializing a GlobalValue address relative
1771 /// to a stack reload.
1772 unsigned getGISelRematGlobalCost() const;
1773
1774 /// \returns the lower bound of a trip count to decide on vectorization
1775 /// while tail-folding.
1776 unsigned getMinTripCountTailFoldingThreshold() const;
1777
1778 /// \returns True if the target supports scalable vectors.
1779 bool supportsScalableVectors() const;
1780
1781 /// \return true when scalable vectorization is preferred.
1782 bool enableScalableVectorization() const;
1783
1784 /// \name Vector Predication Information
1785 /// @{
1786 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1787 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1788 /// Reference - "Vector Predication Intrinsics").
1789 /// Use of %evl is discouraged when that is not the case.
1790 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1791 Align Alignment) const;
1792
1793 /// Return true if sinking I's operands to the same basic block as I is
1794 /// profitable, e.g. because the operands can be folded into a target
1795 /// instruction during instruction selection. After calling the function
1796 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
1797 /// come first).
1799 SmallVectorImpl<Use *> &Ops) const;
1800
1801 /// Return true if it's significantly cheaper to shift a vector by a uniform
1802 /// scalar than by an amount which will vary across each lane. On x86 before
1803 /// AVX2 for example, there is a "psllw" instruction for the former case, but
1804 /// no simple instruction for a general "a << b" operation on vectors.
1805 /// This should also apply to lowering for vector funnel shifts (rotates).
1806 bool isVectorShiftByScalarCheap(Type *Ty) const;
1807
1810 // keep the predicating parameter
1812 // where legal, discard the predicate parameter
1814 // transform into something else that is also predicating
1815 Convert = 2
1817
1818 // How to transform the EVL parameter.
1819 // Legal: keep the EVL parameter as it is.
1820 // Discard: Ignore the EVL parameter where it is safe to do so.
1821 // Convert: Fold the EVL into the mask parameter.
1823
1824 // How to transform the operator.
1825 // Legal: The target supports this operator.
1826 // Convert: Convert this to a non-VP operation.
1827 // The 'Discard' strategy is invalid.
1829
1830 bool shouldDoNothing() const {
1831 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1832 }
1835 };
1836
1837 /// \returns How the target needs this vector-predicated operation to be
1838 /// transformed.
1840 /// @}
1841
1842 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1843 /// state.
1844 ///
1845 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1846 /// node containing a jump table in a format suitable for the target, so it
1847 /// needs to know what format of jump table it can legally use.
1848 ///
1849 /// For non-Arm targets, this function isn't used. It defaults to returning
1850 /// false, but it shouldn't matter what it returns anyway.
1851 bool hasArmWideBranch(bool Thumb) const;
1852
1853 /// \return The maximum number of function arguments the target supports.
1854 unsigned getMaxNumArgs() const;
1855
1856 /// \return For an array of given Size, return alignment boundary to
1857 /// pad to. Default is no padding.
1858 unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
1859
1860 /// @}
1861
1862private:
1863 /// The abstract base class used to type erase specific TTI
1864 /// implementations.
1865 class Concept;
1866
1867 /// The template model for the base class which wraps a concrete
1868 /// implementation in a type erased interface.
1869 template <typename T> class Model;
1870
1871 std::unique_ptr<Concept> TTIImpl;
1872};
1873
1875public:
1876 virtual ~Concept() = 0;
1877 virtual const DataLayout &getDataLayout() const = 0;
1878 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1880 Type *AccessType,
1882 virtual InstructionCost
1884 const TTI::PointersChainInfo &Info, Type *AccessTy,
1886 virtual unsigned getInliningThresholdMultiplier() const = 0;
1888 virtual unsigned
1890 virtual int getInliningLastCallToStaticBonus() const = 0;
1891 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1892 virtual int getInlinerVectorBonusPercent() const = 0;
1893 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1894 const AllocaInst *AI) const = 0;
1897 virtual unsigned
1899 ProfileSummaryInfo *PSI,
1900 BlockFrequencyInfo *BFI) = 0;
1906 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1907 virtual bool isSourceOfDivergence(const Value *V) = 0;
1908 virtual bool isAlwaysUniform(const Value *V) = 0;
1909 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1910 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1911 virtual unsigned getFlatAddressSpace() = 0;
1913 Intrinsic::ID IID) const = 0;
1914 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1915 virtual bool
1917 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1918 virtual bool isSingleThreaded() const = 0;
1919 virtual std::pair<const Value *, unsigned>
1920 getPredicatedAddrSpace(const Value *V) const = 0;
1922 Value *OldV,
1923 Value *NewV) const = 0;
1924 virtual bool isLoweredToCall(const Function *F) = 0;
1927 OptimizationRemarkEmitter *ORE) = 0;
1929 PeelingPreferences &PP) = 0;
1931 AssumptionCache &AC,
1932 TargetLibraryInfo *LibInfo,
1933 HardwareLoopInfo &HWLoopInfo) = 0;
1934 virtual unsigned getEpilogueVectorizationMinVF() = 0;
1936 virtual TailFoldingStyle
1937 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1938 virtual std::optional<Instruction *> instCombineIntrinsic(
1939 InstCombiner &IC, IntrinsicInst &II) = 0;
1940 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1941 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1942 KnownBits & Known, bool &KnownBitsComputed) = 0;
1943 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1944 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1945 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1946 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1947 SimplifyAndSetOp) = 0;
1948 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1949 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
1950 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1951 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1952 int64_t BaseOffset, bool HasBaseReg,
1953 int64_t Scale, unsigned AddrSpace,
1954 Instruction *I,
1955 int64_t ScalableOffset) = 0;
1957 const TargetTransformInfo::LSRCost &C2) = 0;
1958 virtual bool isNumRegsMajorCostOfLSR() = 0;
1961 virtual bool canMacroFuseCmp() = 0;
1962 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1964 TargetLibraryInfo *LibInfo) = 0;
1965 virtual AddressingModeKind
1967 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1968 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1969 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1970 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1971 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1972 ElementCount NumElements) const = 0;
1973 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1974 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1976 Align Alignment) = 0;
1978 Align Alignment) = 0;
1979 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1980 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1981 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1982 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
1983 Align Alignment,
1984 unsigned AddrSpace) = 0;
1985
1986 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) = 0;
1987 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1988 unsigned Opcode1,
1989 const SmallBitVector &OpcodeMask) const = 0;
1990 virtual bool enableOrderedReductions() = 0;
1991 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1992 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1995 StackOffset BaseOffset,
1996 bool HasBaseReg, int64_t Scale,
1997 unsigned AddrSpace) = 0;
1998 virtual bool LSRWithInstrQueries() = 0;
1999 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
2001 virtual bool useAA() = 0;
2002 virtual bool isTypeLegal(Type *Ty) = 0;
2003 virtual unsigned getRegUsageForType(Type *Ty) = 0;
2004 virtual bool shouldBuildLookupTables() = 0;
2006 virtual bool shouldBuildRelLookupTables() = 0;
2007 virtual bool useColdCCForColdCall(Function &F) = 0;
2010 unsigned ScalarOpdIdx) = 0;
2012 int OpdIdx) = 0;
2013 virtual bool
2015 int RetIdx) = 0;
2016 virtual InstructionCost
2018 bool Insert, bool Extract, TargetCostKind CostKind,
2019 ArrayRef<Value *> VL = {}) = 0;
2020 virtual InstructionCost
2022 ArrayRef<Type *> Tys,
2025 virtual bool supportsTailCalls() = 0;
2026 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
2027 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
2029 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
2030 virtual bool enableSelectOptimize() = 0;
2036 unsigned BitWidth,
2037 unsigned AddressSpace,
2038 Align Alignment,
2039 unsigned *Fast) = 0;
2040 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
2041 virtual bool haveFastSqrt(Type *Ty) = 0;
2043 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
2045 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2046 const APInt &Imm, Type *Ty) = 0;
2047 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2049 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2050 const APInt &Imm, Type *Ty,
2052 Instruction *Inst = nullptr) = 0;
2054 const APInt &Imm, Type *Ty,
2057 const Function &Fn) const = 0;
2058 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
2059 virtual bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const = 0;
2060 virtual unsigned getRegisterClassForType(bool Vector,
2061 Type *Ty = nullptr) const = 0;
2062 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
2064 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
2065 virtual std::optional<unsigned> getMaxVScale() const = 0;
2066 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
2067 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
2068 virtual bool
2070 virtual ElementCount getMinimumVF(unsigned ElemWidth,
2071 bool IsScalable) const = 0;
2072 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
2073 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2074 Type *ScalarValTy) const = 0;
2076 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
2077 virtual unsigned getCacheLineSize() const = 0;
2078 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
2079 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
2080 const = 0;
2081 virtual std::optional<unsigned> getMinPageSize() const = 0;
2082
2083 /// \return How much before a load we should place the prefetch
2084 /// instruction. This is currently measured in number of
2085 /// instructions.
2086 virtual unsigned getPrefetchDistance() const = 0;
2087
2088 /// \return Some HW prefetchers can handle accesses up to a certain
2089 /// constant stride. This is the minimum stride in bytes where it
2090 /// makes sense to start adding SW prefetches. The default is 1,
2091 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
2092 /// even below the HW prefetcher limit, and the arguments provided are
2093 /// meant to serve as a basis for deciding this for a particular loop.
2094 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2095 unsigned NumStridedMemAccesses,
2096 unsigned NumPrefetches,
2097 bool HasCall) const = 0;
2098
2099 /// \return The maximum number of iterations to prefetch ahead. If
2100 /// the required number of iterations is more than this number, no
2101 /// prefetching is performed.
2102 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
2103
2104 /// \return True if prefetching should also be done for writes.
2105 virtual bool enableWritePrefetching() const = 0;
2106
2107 /// \return if target want to issue a prefetch in address space \p AS.
2108 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
2109
2110 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2112 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2113 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2114 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2116 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2117 const SmallBitVector &OpcodeMask,
2119
2120 virtual InstructionCost
2123 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2124 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2125 Type *Src, CastContextHint CCH,
2127 const Instruction *I) = 0;
2128 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2129 VectorType *VecTy,
2130 unsigned Index) = 0;
2131 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2133 const Instruction *I = nullptr) = 0;
2134 virtual InstructionCost
2135 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2137 OperandValueInfo Op1Info, OperandValueInfo Op2Info,
2138 const Instruction *I) = 0;
2139 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2141 unsigned Index, Value *Op0,
2142 Value *Op1) = 0;
2143
2144 /// \param ScalarUserAndIdx encodes the information about extracts from a
2145 /// vector with 'Scalar' being the value being extracted,'User' being the user
2146 /// of the extract(nullptr if user is not known before vectorization) and
2147 /// 'Idx' being the extract lane.
2149 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
2150 Value *Scalar,
2151 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2152
2155 unsigned Index) = 0;
2156
2157 virtual InstructionCost
2158 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2159 const APInt &DemandedDstElts,
2161
2162 virtual InstructionCost
2163 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2165 OperandValueInfo OpInfo, const Instruction *I) = 0;
2166 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2167 Align Alignment,
2168 unsigned AddressSpace,
2170 const Instruction *I) = 0;
2171 virtual InstructionCost
2172 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2173 unsigned AddressSpace,
2175 virtual InstructionCost
2176 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2177 bool VariableMask, Align Alignment,
2179 const Instruction *I = nullptr) = 0;
2180 virtual InstructionCost
2181 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2182 bool VariableMask, Align Alignment,
2184 const Instruction *I = nullptr) = 0;
2185
2187 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2188 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2189 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2190 virtual InstructionCost
2192 std::optional<FastMathFlags> FMF,
2194 virtual InstructionCost
2198 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2199 FastMathFlags FMF,
2202 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2204 virtual InstructionCost
2208 ArrayRef<Type *> Tys,
2210 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2211 virtual InstructionCost
2213 virtual InstructionCost
2216 MemIntrinsicInfo &Info) = 0;
2217 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2219 Type *ExpectedType) = 0;
2221 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2222 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2223 std::optional<uint32_t> AtomicElementSize) const = 0;
2224
2226 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2227 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2228 Align SrcAlign, Align DestAlign,
2229 std::optional<uint32_t> AtomicCpySize) const = 0;
2230 virtual bool areInlineCompatible(const Function *Caller,
2231 const Function *Callee) const = 0;
2232 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2233 unsigned DefaultCallPenalty) const = 0;
2234 virtual bool areTypesABICompatible(const Function *Caller,
2235 const Function *Callee,
2236 const ArrayRef<Type *> &Types) const = 0;
2237 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2238 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2239 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2240 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2241 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2242 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2243 Align Alignment,
2244 unsigned AddrSpace) const = 0;
2245 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2246 Align Alignment,
2247 unsigned AddrSpace) const = 0;
2249 ElementCount VF) const = 0;
2250 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2251 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2252 unsigned ChainSizeInBytes,
2253 VectorType *VecTy) const = 0;
2254 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2255 unsigned ChainSizeInBytes,
2256 VectorType *VecTy) const = 0;
2257 virtual bool preferFixedOverScalableIfEqualCost() const = 0;
2258 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2259 ReductionFlags) const = 0;
2260 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2261 ReductionFlags) const = 0;
2262 virtual bool preferEpilogueVectorization() const = 0;
2263
2264 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2265 virtual ReductionShuffle
2267 virtual unsigned getGISelRematGlobalCost() const = 0;
2268 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2269 virtual bool enableScalableVectorization() const = 0;
2270 virtual bool supportsScalableVectors() const = 0;
2271 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2272 Align Alignment) const = 0;
2273 virtual bool
2275 SmallVectorImpl<Use *> &OpsToSink) const = 0;
2276
2277 virtual bool isVectorShiftByScalarCheap(Type *Ty) const = 0;
2278 virtual VPLegalization
2280 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2281 virtual unsigned getMaxNumArgs() const = 0;
2282 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
2283 Type *ArrayType) const = 0;
2284};
2285
2286template <typename T>
2287class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2288 T Impl;
2289
2290public:
2291 Model(T Impl) : Impl(std::move(Impl)) {}
2292 ~Model() override = default;
2293
2294 const DataLayout &getDataLayout() const override {
2295 return Impl.getDataLayout();
2296 }
2297
2298 InstructionCost
2299 getGEPCost(Type *PointeeType, const Value *Ptr,
2300 ArrayRef<const Value *> Operands, Type *AccessType,
2302 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2303 }
2304 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2305 const Value *Base,
2306 const PointersChainInfo &Info,
2307 Type *AccessTy,
2308 TargetCostKind CostKind) override {
2309 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2310 }
2311 unsigned getInliningThresholdMultiplier() const override {
2312 return Impl.getInliningThresholdMultiplier();
2313 }
2314 unsigned adjustInliningThreshold(const CallBase *CB) override {
2315 return Impl.adjustInliningThreshold(CB);
2316 }
2317 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2318 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2319 }
2320 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2321 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2322 }
2323 int getInliningLastCallToStaticBonus() const override {
2324 return Impl.getInliningLastCallToStaticBonus();
2325 }
2326 int getInlinerVectorBonusPercent() const override {
2327 return Impl.getInlinerVectorBonusPercent();
2328 }
2329 unsigned getCallerAllocaCost(const CallBase *CB,
2330 const AllocaInst *AI) const override {
2331 return Impl.getCallerAllocaCost(CB, AI);
2332 }
2333 InstructionCost getMemcpyCost(const Instruction *I) override {
2334 return Impl.getMemcpyCost(I);
2335 }
2336
2337 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2338 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2339 }
2340
2341 InstructionCost getInstructionCost(const User *U,
2342 ArrayRef<const Value *> Operands,
2343 TargetCostKind CostKind) override {
2344 return Impl.getInstructionCost(U, Operands, CostKind);
2345 }
2346 BranchProbability getPredictableBranchThreshold() override {
2347 return Impl.getPredictableBranchThreshold();
2348 }
2349 InstructionCost getBranchMispredictPenalty() override {
2350 return Impl.getBranchMispredictPenalty();
2351 }
2352 bool hasBranchDivergence(const Function *F = nullptr) override {
2353 return Impl.hasBranchDivergence(F);
2354 }
2355 bool isSourceOfDivergence(const Value *V) override {
2356 return Impl.isSourceOfDivergence(V);
2357 }
2358
2359 bool isAlwaysUniform(const Value *V) override {
2360 return Impl.isAlwaysUniform(V);
2361 }
2362
2363 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2364 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2365 }
2366
2367 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2368 return Impl.addrspacesMayAlias(AS0, AS1);
2369 }
2370
2371 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2372
2373 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2374 Intrinsic::ID IID) const override {
2375 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2376 }
2377
2378 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2379 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2380 }
2381
2382 bool
2383 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2384 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2385 }
2386
2387 unsigned getAssumedAddrSpace(const Value *V) const override {
2388 return Impl.getAssumedAddrSpace(V);
2389 }
2390
2391 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2392
2393 std::pair<const Value *, unsigned>
2394 getPredicatedAddrSpace(const Value *V) const override {
2395 return Impl.getPredicatedAddrSpace(V);
2396 }
2397
2398 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2399 Value *NewV) const override {
2400 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2401 }
2402
2403 bool isLoweredToCall(const Function *F) override {
2404 return Impl.isLoweredToCall(F);
2405 }
2406 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2407 UnrollingPreferences &UP,
2408 OptimizationRemarkEmitter *ORE) override {
2409 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2410 }
2411 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2412 PeelingPreferences &PP) override {
2413 return Impl.getPeelingPreferences(L, SE, PP);
2414 }
2415 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2416 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2417 HardwareLoopInfo &HWLoopInfo) override {
2418 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2419 }
2420 unsigned getEpilogueVectorizationMinVF() override {
2421 return Impl.getEpilogueVectorizationMinVF();
2422 }
2423 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2424 return Impl.preferPredicateOverEpilogue(TFI);
2425 }
2427 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2428 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2429 }
2430 std::optional<Instruction *>
2431 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2432 return Impl.instCombineIntrinsic(IC, II);
2433 }
2434 std::optional<Value *>
2435 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2436 APInt DemandedMask, KnownBits &Known,
2437 bool &KnownBitsComputed) override {
2438 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2439 KnownBitsComputed);
2440 }
2441 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2442 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2443 APInt &UndefElts2, APInt &UndefElts3,
2444 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2445 SimplifyAndSetOp) override {
2446 return Impl.simplifyDemandedVectorEltsIntrinsic(
2447 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2448 SimplifyAndSetOp);
2449 }
2450 bool isLegalAddImmediate(int64_t Imm) override {
2451 return Impl.isLegalAddImmediate(Imm);
2452 }
2453 bool isLegalAddScalableImmediate(int64_t Imm) override {
2454 return Impl.isLegalAddScalableImmediate(Imm);
2455 }
2456 bool isLegalICmpImmediate(int64_t Imm) override {
2457 return Impl.isLegalICmpImmediate(Imm);
2458 }
2459 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2460 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2461 Instruction *I, int64_t ScalableOffset) override {
2462 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2463 AddrSpace, I, ScalableOffset);
2464 }
2465 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2466 const TargetTransformInfo::LSRCost &C2) override {
2467 return Impl.isLSRCostLess(C1, C2);
2468 }
2469 bool isNumRegsMajorCostOfLSR() override {
2470 return Impl.isNumRegsMajorCostOfLSR();
2471 }
2472 bool shouldDropLSRSolutionIfLessProfitable() const override {
2473 return Impl.shouldDropLSRSolutionIfLessProfitable();
2474 }
2475 bool isProfitableLSRChainElement(Instruction *I) override {
2476 return Impl.isProfitableLSRChainElement(I);
2477 }
2478 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2479 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2480 DominatorTree *DT, AssumptionCache *AC,
2481 TargetLibraryInfo *LibInfo) override {
2482 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2483 }
2485 getPreferredAddressingMode(const Loop *L,
2486 ScalarEvolution *SE) const override {
2487 return Impl.getPreferredAddressingMode(L, SE);
2488 }
2489 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2490 return Impl.isLegalMaskedStore(DataType, Alignment);
2491 }
2492 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2493 return Impl.isLegalMaskedLoad(DataType, Alignment);
2494 }
2495 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2496 return Impl.isLegalNTStore(DataType, Alignment);
2497 }
2498 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2499 return Impl.isLegalNTLoad(DataType, Alignment);
2500 }
2501 bool isLegalBroadcastLoad(Type *ElementTy,
2502 ElementCount NumElements) const override {
2503 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2504 }
2505 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2506 return Impl.isLegalMaskedScatter(DataType, Alignment);
2507 }
2508 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2509 return Impl.isLegalMaskedGather(DataType, Alignment);
2510 }
2511 bool forceScalarizeMaskedGather(VectorType *DataType,
2512 Align Alignment) override {
2513 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2514 }
2515 bool forceScalarizeMaskedScatter(VectorType *DataType,
2516 Align Alignment) override {
2517 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2518 }
2519 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2520 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2521 }
2522 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2523 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2524 }
2525 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2526 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2527 }
2528 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
2529 Align Alignment,
2530 unsigned AddrSpace) override {
2531 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2532 }
2533 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) override {
2534 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2535 }
2536 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2537 const SmallBitVector &OpcodeMask) const override {
2538 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2539 }
2540 bool enableOrderedReductions() override {
2541 return Impl.enableOrderedReductions();
2542 }
2543 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2544 return Impl.hasDivRemOp(DataType, IsSigned);
2545 }
2546 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2547 return Impl.hasVolatileVariant(I, AddrSpace);
2548 }
2549 bool prefersVectorizedAddressing() override {
2550 return Impl.prefersVectorizedAddressing();
2551 }
2552 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2553 StackOffset BaseOffset, bool HasBaseReg,
2554 int64_t Scale,
2555 unsigned AddrSpace) override {
2556 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2557 AddrSpace);
2558 }
2559 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2560 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2561 return Impl.isTruncateFree(Ty1, Ty2);
2562 }
2563 bool isProfitableToHoist(Instruction *I) override {
2564 return Impl.isProfitableToHoist(I);
2565 }
2566 bool useAA() override { return Impl.useAA(); }
2567 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2568 unsigned getRegUsageForType(Type *Ty) override {
2569 return Impl.getRegUsageForType(Ty);
2570 }
2571 bool shouldBuildLookupTables() override {
2572 return Impl.shouldBuildLookupTables();
2573 }
2574 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2575 return Impl.shouldBuildLookupTablesForConstant(C);
2576 }
2577 bool shouldBuildRelLookupTables() override {
2578 return Impl.shouldBuildRelLookupTables();
2579 }
2580 bool useColdCCForColdCall(Function &F) override {
2581 return Impl.useColdCCForColdCall(F);
2582 }
2583 bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) override {
2584 return Impl.isTargetIntrinsicTriviallyScalarizable(ID);
2585 }
2586
2587 bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
2588 unsigned ScalarOpdIdx) override {
2589 return Impl.isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
2590 }
2591
2592 bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
2593 int OpdIdx) override {
2594 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
2595 }
2596
2597 bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
2598 int RetIdx) override {
2599 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
2600 }
2601
2602 InstructionCost getScalarizationOverhead(VectorType *Ty,
2603 const APInt &DemandedElts,
2604 bool Insert, bool Extract,
2606 ArrayRef<Value *> VL = {}) override {
2607 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2608 CostKind, VL);
2609 }
2610 InstructionCost
2611 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2612 ArrayRef<Type *> Tys,
2613 TargetCostKind CostKind) override {
2614 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2615 }
2616
2617 bool supportsEfficientVectorElementLoadStore() override {
2618 return Impl.supportsEfficientVectorElementLoadStore();
2619 }
2620
2621 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2622 bool supportsTailCallFor(const CallBase *CB) override {
2623 return Impl.supportsTailCallFor(CB);
2624 }
2625
2626 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2627 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2628 }
2629 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2630 bool IsZeroCmp) const override {
2631 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2632 }
2633 bool enableSelectOptimize() override {
2634 return Impl.enableSelectOptimize();
2635 }
2636 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2637 return Impl.shouldTreatInstructionLikeSelect(I);
2638 }
2639 bool enableInterleavedAccessVectorization() override {
2640 return Impl.enableInterleavedAccessVectorization();
2641 }
2642 bool enableMaskedInterleavedAccessVectorization() override {
2643 return Impl.enableMaskedInterleavedAccessVectorization();
2644 }
2645 bool isFPVectorizationPotentiallyUnsafe() override {
2646 return Impl.isFPVectorizationPotentiallyUnsafe();
2647 }
2648 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2649 unsigned AddressSpace, Align Alignment,
2650 unsigned *Fast) override {
2651 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2652 Alignment, Fast);
2653 }
2654 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2655 return Impl.getPopcntSupport(IntTyWidthInBit);
2656 }
2657 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2658
2659 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2660 return Impl.isExpensiveToSpeculativelyExecute(I);
2661 }
2662
2663 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2664 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2665 }
2666
2667 InstructionCost getFPOpCost(Type *Ty) override {
2668 return Impl.getFPOpCost(Ty);
2669 }
2670
2671 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2672 const APInt &Imm, Type *Ty) override {
2673 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2674 }
2675 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2676 TargetCostKind CostKind) override {
2677 return Impl.getIntImmCost(Imm, Ty, CostKind);
2678 }
2679 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2680 const APInt &Imm, Type *Ty,
2682 Instruction *Inst = nullptr) override {
2683 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2684 }
2685 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2686 const APInt &Imm, Type *Ty,
2687 TargetCostKind CostKind) override {
2688 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2689 }
2690 bool preferToKeepConstantsAttached(const Instruction &Inst,
2691 const Function &Fn) const override {
2692 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2693 }
2694 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2695 return Impl.getNumberOfRegisters(ClassID);
2696 }
2697 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const override {
2698 return Impl.hasConditionalLoadStoreForType(Ty);
2699 }
2700 unsigned getRegisterClassForType(bool Vector,
2701 Type *Ty = nullptr) const override {
2702 return Impl.getRegisterClassForType(Vector, Ty);
2703 }
2704 const char *getRegisterClassName(unsigned ClassID) const override {
2705 return Impl.getRegisterClassName(ClassID);
2706 }
2707 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2708 return Impl.getRegisterBitWidth(K);
2709 }
2710 unsigned getMinVectorRegisterBitWidth() const override {
2711 return Impl.getMinVectorRegisterBitWidth();
2712 }
2713 std::optional<unsigned> getMaxVScale() const override {
2714 return Impl.getMaxVScale();
2715 }
2716 std::optional<unsigned> getVScaleForTuning() const override {
2717 return Impl.getVScaleForTuning();
2718 }
2719 bool isVScaleKnownToBeAPowerOfTwo() const override {
2720 return Impl.isVScaleKnownToBeAPowerOfTwo();
2721 }
2722 bool shouldMaximizeVectorBandwidth(
2723 TargetTransformInfo::RegisterKind K) const override {
2724 return Impl.shouldMaximizeVectorBandwidth(K);
2725 }
2726 ElementCount getMinimumVF(unsigned ElemWidth,
2727 bool IsScalable) const override {
2728 return Impl.getMinimumVF(ElemWidth, IsScalable);
2729 }
2730 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2731 return Impl.getMaximumVF(ElemWidth, Opcode);
2732 }
2733 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2734 Type *ScalarValTy) const override {
2735 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2736 }
2737 bool shouldConsiderAddressTypePromotion(
2738 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2739 return Impl.shouldConsiderAddressTypePromotion(
2740 I, AllowPromotionWithoutCommonHeader);
2741 }
2742 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2743 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2744 return Impl.getCacheSize(Level);
2745 }
2746 std::optional<unsigned>
2747 getCacheAssociativity(CacheLevel Level) const override {
2748 return Impl.getCacheAssociativity(Level);
2749 }
2750
2751 std::optional<unsigned> getMinPageSize() const override {
2752 return Impl.getMinPageSize();
2753 }
2754
2755 /// Return the preferred prefetch distance in terms of instructions.
2756 ///
2757 unsigned getPrefetchDistance() const override {
2758 return Impl.getPrefetchDistance();
2759 }
2760
2761 /// Return the minimum stride necessary to trigger software
2762 /// prefetching.
2763 ///
2764 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2765 unsigned NumStridedMemAccesses,
2766 unsigned NumPrefetches,
2767 bool HasCall) const override {
2768 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2769 NumPrefetches, HasCall);
2770 }
2771
2772 /// Return the maximum prefetch distance in terms of loop
2773 /// iterations.
2774 ///
2775 unsigned getMaxPrefetchIterationsAhead() const override {
2776 return Impl.getMaxPrefetchIterationsAhead();
2777 }
2778
2779 /// \return True if prefetching should also be done for writes.
2780 bool enableWritePrefetching() const override {
2781 return Impl.enableWritePrefetching();
2782 }
2783
2784 /// \return if target want to issue a prefetch in address space \p AS.
2785 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2786 return Impl.shouldPrefetchAddressSpace(AS);
2787 }
2788
2789 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2790 return Impl.getMaxInterleaveFactor(VF);
2791 }
2792 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2793 unsigned &JTSize,
2794 ProfileSummaryInfo *PSI,
2795 BlockFrequencyInfo *BFI) override {
2796 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2797 }
2798 InstructionCost getArithmeticInstrCost(
2799 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2800 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2801 ArrayRef<const Value *> Args,
2802 const Instruction *CxtI = nullptr) override {
2803 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2804 Args, CxtI);
2805 }
2806 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2807 unsigned Opcode1,
2808 const SmallBitVector &OpcodeMask,
2809 TTI::TargetCostKind CostKind) const override {
2810 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2811 }
2812
2813 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2814 ArrayRef<int> Mask,
2816 VectorType *SubTp,
2817 ArrayRef<const Value *> Args,
2818 const Instruction *CxtI) override {
2819 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2820 CxtI);
2821 }
2822 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2823 CastContextHint CCH,
2825 const Instruction *I) override {
2826 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2827 }
2828 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2829 VectorType *VecTy,
2830 unsigned Index) override {
2831 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2832 }
2833 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2834 const Instruction *I = nullptr) override {
2835 return Impl.getCFInstrCost(Opcode, CostKind, I);
2836 }
2837 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2838 CmpInst::Predicate VecPred,
2840 OperandValueInfo Op1Info,
2841 OperandValueInfo Op2Info,
2842 const Instruction *I) override {
2843 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
2844 Op1Info, Op2Info, I);
2845 }
2846 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2848 unsigned Index, Value *Op0,
2849 Value *Op1) override {
2850 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2851 }
2852 InstructionCost getVectorInstrCost(
2853 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
2854 Value *Scalar,
2855 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) override {
2856 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Scalar,
2857 ScalarUserAndIdx);
2858 }
2859 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2861 unsigned Index) override {
2862 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2863 }
2864 InstructionCost
2865 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2866 const APInt &DemandedDstElts,
2867 TTI::TargetCostKind CostKind) override {
2868 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2869 DemandedDstElts, CostKind);
2870 }
2871 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2872 unsigned AddressSpace,
2874 OperandValueInfo OpInfo,
2875 const Instruction *I) override {
2876 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2877 OpInfo, I);
2878 }
2879 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2880 unsigned AddressSpace,
2882 const Instruction *I) override {
2883 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2884 CostKind, I);
2885 }
2886 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2887 Align Alignment, unsigned AddressSpace,
2888 TTI::TargetCostKind CostKind) override {
2889 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2890 CostKind);
2891 }
2892 InstructionCost
2893 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2894 bool VariableMask, Align Alignment,
2896 const Instruction *I = nullptr) override {
2897 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2898 Alignment, CostKind, I);
2899 }
2900 InstructionCost
2901 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2902 bool VariableMask, Align Alignment,
2904 const Instruction *I = nullptr) override {
2905 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2906 Alignment, CostKind, I);
2907 }
2908 InstructionCost getInterleavedMemoryOpCost(
2909 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2910 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2911 bool UseMaskForCond, bool UseMaskForGaps) override {
2912 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2913 Alignment, AddressSpace, CostKind,
2914 UseMaskForCond, UseMaskForGaps);
2915 }
2916 InstructionCost
2917 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2918 std::optional<FastMathFlags> FMF,
2919 TTI::TargetCostKind CostKind) override {
2920 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2921 }
2922 InstructionCost
2923 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2924 TTI::TargetCostKind CostKind) override {
2925 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2926 }
2927 InstructionCost
2928 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2929 VectorType *Ty, FastMathFlags FMF,
2930 TTI::TargetCostKind CostKind) override {
2931 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2932 CostKind);
2933 }
2934 InstructionCost
2935 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2936 TTI::TargetCostKind CostKind) override {
2937 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2938 }
2939 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2940 TTI::TargetCostKind CostKind) override {
2941 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2942 }
2943 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2944 ArrayRef<Type *> Tys,
2945 TTI::TargetCostKind CostKind) override {
2946 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2947 }
2948 unsigned getNumberOfParts(Type *Tp) override {
2949 return Impl.getNumberOfParts(Tp);
2950 }
2951 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2952 const SCEV *Ptr) override {
2953 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2954 }
2955 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2956 return Impl.getCostOfKeepingLiveOverCall(Tys);
2957 }
2958 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2959 MemIntrinsicInfo &Info) override {
2960 return Impl.getTgtMemIntrinsic(Inst, Info);
2961 }
2962 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2963 return Impl.getAtomicMemIntrinsicMaxElementSize();
2964 }
2965 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2966 Type *ExpectedType) override {
2967 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2968 }
2969 Type *getMemcpyLoopLoweringType(
2970 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2971 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2972 std::optional<uint32_t> AtomicElementSize) const override {
2973 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2974 DestAddrSpace, SrcAlign, DestAlign,
2975 AtomicElementSize);
2976 }
2977 void getMemcpyLoopResidualLoweringType(
2978 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2979 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2980 Align SrcAlign, Align DestAlign,
2981 std::optional<uint32_t> AtomicCpySize) const override {
2982 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2983 SrcAddrSpace, DestAddrSpace,
2984 SrcAlign, DestAlign, AtomicCpySize);
2985 }
2986 bool areInlineCompatible(const Function *Caller,
2987 const Function *Callee) const override {
2988 return Impl.areInlineCompatible(Caller, Callee);
2989 }
2990 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2991 unsigned DefaultCallPenalty) const override {
2992 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2993 }
2994 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2995 const ArrayRef<Type *> &Types) const override {
2996 return Impl.areTypesABICompatible(Caller, Callee, Types);
2997 }
2998 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2999 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
3000 }
3001 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
3002 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
3003 }
3004 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
3005 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3006 }
3007 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
3008 return Impl.isLegalToVectorizeLoad(LI);
3009 }
3010 bool isLegalToVectorizeStore(StoreInst *SI) const override {
3011 return Impl.isLegalToVectorizeStore(SI);
3012 }
3013 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
3014 unsigned AddrSpace) const override {
3015 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3016 AddrSpace);
3017 }
3018 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
3019 unsigned AddrSpace) const override {
3020 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3021 AddrSpace);
3022 }
3023 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
3024 ElementCount VF) const override {
3025 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3026 }
3027 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
3028 return Impl.isElementTypeLegalForScalableVector(Ty);
3029 }
3030 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
3031 unsigned ChainSizeInBytes,
3032 VectorType *VecTy) const override {
3033 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3034 }
3035 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
3036 unsigned ChainSizeInBytes,
3037 VectorType *VecTy) const override {
3038 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3039 }
3040 bool preferFixedOverScalableIfEqualCost() const override {
3041 return Impl.preferFixedOverScalableIfEqualCost();
3042 }
3043 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
3044 ReductionFlags Flags) const override {
3045 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3046 }
3047 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
3048 ReductionFlags Flags) const override {
3049 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3050 }
3051 bool preferEpilogueVectorization() const override {
3052 return Impl.preferEpilogueVectorization();
3053 }
3054
3055 bool shouldExpandReduction(const IntrinsicInst *II) const override {
3056 return Impl.shouldExpandReduction(II);
3057 }
3058
3060 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
3061 return Impl.getPreferredExpandedReductionShuffle(II);
3062 }
3063
3064 unsigned getGISelRematGlobalCost() const override {
3065 return Impl.getGISelRematGlobalCost();
3066 }
3067
3068 unsigned getMinTripCountTailFoldingThreshold() const override {
3069 return Impl.getMinTripCountTailFoldingThreshold();
3070 }
3071
3072 bool supportsScalableVectors() const override {
3073 return Impl.supportsScalableVectors();
3074 }
3075
3076 bool enableScalableVectorization() const override {
3077 return Impl.enableScalableVectorization();
3078 }
3079
3080 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
3081 Align Alignment) const override {
3082 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3083 }
3084
3085 bool isProfitableToSinkOperands(Instruction *I,
3086 SmallVectorImpl<Use *> &Ops) const override {
3087 return Impl.isProfitableToSinkOperands(I, Ops);
3088 };
3089
3090 bool isVectorShiftByScalarCheap(Type *Ty) const override {
3091 return Impl.isVectorShiftByScalarCheap(Ty);
3092 }
3093
3095 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
3096 return Impl.getVPLegalizationStrategy(PI);
3097 }
3098
3099 bool hasArmWideBranch(bool Thumb) const override {
3100 return Impl.hasArmWideBranch(Thumb);
3101 }
3102
3103 unsigned getMaxNumArgs() const override {
3104 return Impl.getMaxNumArgs();
3105 }
3106
3107 unsigned getNumBytesToPadGlobalArray(unsigned Size,
3108 Type *ArrayType) const override {
3109 return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
3110 }
3111};
3112
3113template <typename T>
3115 : TTIImpl(new Model<T>(Impl)) {}
3116
3117/// Analysis pass providing the \c TargetTransformInfo.
3118///
3119/// The core idea of the TargetIRAnalysis is to expose an interface through
3120/// which LLVM targets can analyze and provide information about the middle
3121/// end's target-independent IR. This supports use cases such as target-aware
3122/// cost modeling of IR constructs.
3123///
3124/// This is a function analysis because much of the cost modeling for targets
3125/// is done in a subtarget specific way and LLVM supports compiling different
3126/// functions targeting different subtargets in order to support runtime
3127/// dispatch according to the observed subtarget.
3128class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
3129public:
3131
3132 /// Default construct a target IR analysis.
3133 ///
3134 /// This will use the module's datalayout to construct a baseline
3135 /// conservative TTI result.
3137
3138 /// Construct an IR analysis pass around a target-provide callback.
3139 ///
3140 /// The callback will be called with a particular function for which the TTI
3141 /// is needed and must return a TTI object for that function.
3142 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
3143
3144 // Value semantics. We spell out the constructors for MSVC.
3146 : TTICallback(Arg.TTICallback) {}
3148 : TTICallback(std::move(Arg.TTICallback)) {}
3150 TTICallback = RHS.TTICallback;
3151 return *this;
3152 }
3154 TTICallback = std::move(RHS.TTICallback);
3155 return *this;
3156 }
3157
3159
3160private:
3162 static AnalysisKey Key;
3163
3164 /// The callback used to produce a result.
3165 ///
3166 /// We use a completely opaque callback so that targets can provide whatever
3167 /// mechanism they desire for constructing the TTI for a given function.
3168 ///
3169 /// FIXME: Should we really use std::function? It's relatively inefficient.
3170 /// It might be possible to arrange for even stateful callbacks to outlive
3171 /// the analysis and thus use a function_ref which would be lighter weight.
3172 /// This may also be less error prone as the callback is likely to reference
3173 /// the external TargetMachine, and that reference needs to never dangle.
3174 std::function<Result(const Function &)> TTICallback;
3175
3176 /// Helper function used as the callback in the default constructor.
3177 static Result getDefaultTTI(const Function &F);
3178};
3179
3180/// Wrapper pass for TargetTransformInfo.
3181///
3182/// This pass can be constructed from a TTI object which it stores internally
3183/// and is queried by passes.
3185 TargetIRAnalysis TIRA;
3186 std::optional<TargetTransformInfo> TTI;
3187
3188 virtual void anchor();
3189
3190public:
3191 static char ID;
3192
3193 /// We must provide a default constructor for the pass but it should
3194 /// never be used.
3195 ///
3196 /// Use the constructor below or call one of the creation routines.
3198
3200
3202};
3203
3204/// Create an analysis pass wrapper around a TTI object.
3205///
3206/// This analysis pass just holds the TTI instance and makes it available to
3207/// clients.
3209
3210} // namespace llvm
3211
3212#endif
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:410
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint32_t Index
uint64_t Size
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:281
The core instruction combiner logic.
Definition: InstCombiner.h:48
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:42
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:622
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:292
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual bool preferFixedOverScalableIfEqualCost() const =0
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual InstructionCost getBranchMispredictPenalty()=0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, OperandValueInfo Op1Info, OperandValueInfo Op2Info, const Instruction *I)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace)=0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)=0
virtual bool isVectorShiftByScalarCheap(Type *Ty) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind, ArrayRef< Value * > VL={})=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)=0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool isLegalAddScalableImmediate(int64_t Imm)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const =0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const =0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &OpsToSink) const =0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)=0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool shouldDropLSRSolutionIfLessProfitable() const =0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args, const Instruction *CxtI)=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual unsigned getEpilogueVectorizationMinVF()=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I, int64_t ScalableOffset)=0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx)=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual int getInliningLastCallToStaticBonus() const =0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
Return true is the target supports interleaved access for the given vector type VTy,...
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
unsigned getEpilogueVectorizationMinVF() const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
Identifies if the vector form of the intrinsic has a scalar operand.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={}) const
Estimate the overhead of scalarizing an instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
@ None
Definition: CodeGenData.h:106
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)