LLVM 17.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AssumptionCache;
44class DominatorTree;
45class BranchInst;
46class CallBase;
47class Function;
48class GlobalValue;
49class InstCombiner;
52class IntrinsicInst;
53class LoadInst;
54class Loop;
55class LoopInfo;
59class SCEV;
60class ScalarEvolution;
61class StoreInst;
62class SwitchInst;
64class Type;
65class User;
66class Value;
67class VPIntrinsic;
68struct KnownBits;
69
70/// Information about a load/store intrinsic defined by the target.
72 /// This is the pointer that the intrinsic is loading from or storing to.
73 /// If this is non-null, then analysis/optimization passes can assume that
74 /// this intrinsic is functionally equivalent to a load/store from this
75 /// pointer.
76 Value *PtrVal = nullptr;
77
78 // Ordering for atomic operations.
80
81 // Same Id is set by the target for corresponding load/store intrinsics.
82 unsigned short MatchingId = 0;
83
84 bool ReadMem = false;
85 bool WriteMem = false;
86 bool IsVolatile = false;
87
88 bool isUnordered() const {
92 }
93};
94
95/// Attributes of a target dependent hardware loop.
97 HardwareLoopInfo() = delete;
99 Loop *L = nullptr;
102 const SCEV *ExitCount = nullptr;
104 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105 // value in every iteration.
106 bool IsNestingLegal = false; // Can a hardware loop be a parent to
107 // another hardware loop?
108 bool CounterInReg = false; // Should loop counter be updated in
109 // the loop via a phi?
110 bool PerformEntryTest = false; // Generate the intrinsic which also performs
111 // icmp ne zero on the loop counter value and
112 // produces an i1 to guard the loop entry.
114 DominatorTree &DT, bool ForceNestedLoop = false,
115 bool ForceHardwareLoopPHI = false);
116 bool canAnalyze(LoopInfo &LI);
117};
118
120 const IntrinsicInst *II = nullptr;
121 Type *RetTy = nullptr;
122 Intrinsic::ID IID;
123 SmallVector<Type *, 4> ParamTys;
125 FastMathFlags FMF;
126 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127 // arguments and the return value will be computed based on types.
128 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129
130public:
132 Intrinsic::ID Id, const CallBase &CI,
134 bool TypeBasedOnly = false);
135
137 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
138 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140
143
147 const IntrinsicInst *I = nullptr,
149
150 Intrinsic::ID getID() const { return IID; }
151 const IntrinsicInst *getInst() const { return II; }
152 Type *getReturnType() const { return RetTy; }
153 FastMathFlags getFlags() const { return FMF; }
154 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157
158 bool isTypeBasedOnly() const {
159 return Arguments.empty();
160 }
161
162 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163};
164
166 /// Don't use tail folding
167 None,
168 /// Use predicate only to mask operations on data in the loop.
169 /// When the VL is not known to be a power-of-2, this method requires a
170 /// runtime overflow check for the i + VL in the loop because it compares the
171 /// scalar induction variable against the tripcount rounded up by VL which may
172 /// overflow. When the VL is a power-of-2, both the increment and uprounded
173 /// tripcount will overflow to 0, which does not require a runtime check
174 /// since the loop is exited when the loop induction variable equals the
175 /// uprounded trip-count, which are both 0.
176 Data,
177 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
178 /// calculate the mask and instead implements this with a
179 /// splat/stepvector/cmp.
180 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
181 /// active.lane.mask intrinsic when it is not natively supported?
183 /// Use predicate to control both data and control flow.
184 /// This method always requires a runtime overflow check for the i + VL
185 /// increment inside the loop, because it uses the result direclty in the
186 /// active.lane.mask to calculate the mask for the next iteration. If the
187 /// increment overflows, the mask is no longer correct.
189 /// Use predicate to control both data and control flow, but modify
190 /// the trip count so that a runtime overflow check can be avoided
191 /// and such that the scalar epilogue loop can always be removed.
193};
194
195class TargetTransformInfo;
197
198/// This pass provides access to the codegen interfaces that are needed
199/// for IR-level transformations.
201public:
202 /// Construct a TTI object using a type implementing the \c Concept
203 /// API below.
204 ///
205 /// This is used by targets to construct a TTI wrapping their target-specific
206 /// implementation that encodes appropriate costs for their target.
207 template <typename T> TargetTransformInfo(T Impl);
208
209 /// Construct a baseline TTI object using a minimal implementation of
210 /// the \c Concept API below.
211 ///
212 /// The TTI implementation will reflect the information in the DataLayout
213 /// provided if non-null.
214 explicit TargetTransformInfo(const DataLayout &DL);
215
216 // Provide move semantics.
219
220 // We need to define the destructor out-of-line to define our sub-classes
221 // out-of-line.
223
224 /// Handle the invalidation of this information.
225 ///
226 /// When used as a result of \c TargetIRAnalysis this method will be called
227 /// when the function this was computed for changes. When it returns false,
228 /// the information is preserved across those changes.
231 // FIXME: We should probably in some way ensure that the subtarget
232 // information for a function hasn't changed.
233 return false;
234 }
235
236 /// \name Generic Target Information
237 /// @{
238
239 /// The kind of cost model.
240 ///
241 /// There are several different cost models that can be customized by the
242 /// target. The normalization of each cost model may be target specific.
243 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
244 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
246 TCK_RecipThroughput, ///< Reciprocal throughput.
247 TCK_Latency, ///< The latency of instruction.
248 TCK_CodeSize, ///< Instruction code size.
249 TCK_SizeAndLatency ///< The weighted sum of size and latency.
250 };
251
252 /// Underlying constants for 'cost' values in this interface.
253 ///
254 /// Many APIs in this interface return a cost. This enum defines the
255 /// fundamental values that should be used to interpret (and produce) those
256 /// costs. The costs are returned as an int rather than a member of this
257 /// enumeration because it is expected that the cost of one IR instruction
258 /// may have a multiplicative factor to it or otherwise won't fit directly
259 /// into the enum. Moreover, it is common to sum or average costs which works
260 /// better as simple integral values. Thus this enum only provides constants.
261 /// Also note that the returned costs are signed integers to make it natural
262 /// to add, subtract, and test with zero (a common boundary condition). It is
263 /// not expected that 2^32 is a realistic cost to be modeling at any point.
264 ///
265 /// Note that these costs should usually reflect the intersection of code-size
266 /// cost and execution cost. A free instruction is typically one that folds
267 /// into another instruction. For example, reg-to-reg moves can often be
268 /// skipped by renaming the registers in the CPU, but they still are encoded
269 /// and thus wouldn't be considered 'free' here.
271 TCC_Free = 0, ///< Expected to fold away in lowering.
272 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
273 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
274 };
275
276 /// Estimate the cost of a GEP operation when lowered.
278 getGEPCost(Type *PointeeType, const Value *Ptr,
281
282 /// Describe known properties for a set of pointers.
284 /// All the GEPs in a set have same base address.
285 unsigned IsSameBaseAddress : 1;
286 /// These properties only valid if SameBaseAddress is set.
287 /// True if distance between any two neigbouring pointers is same value.
288 unsigned IsUniformStride : 1;
289 /// True if distance between any two neigbouring pointers is a known value.
290 unsigned IsKnownStride : 1;
291 unsigned Reserved : 29;
292
293 bool isSameBase() const { return IsSameBaseAddress; }
294 bool isUniformStride() const {
296 }
298
300 return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/1,
301 /*IsKnownStride=*/1, 0};
302 }
304 return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/1,
305 /*IsKnownStride=*/0, 0};
306 }
308 return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/0,
309 /*IsKnownStride=*/1, 0};
310 }
312 return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/0,
313 /*IsKnownStride=*/0, 0};
314 }
315 };
316 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
317
318 /// Estimate the cost of a chain of pointers (typically pointer operands of a
319 /// chain of loads or stores within same block) operations set when lowered.
322 const PointersChainInfo &Info,
324
325 ) const;
326
327 /// \returns A value by which our inlining threshold should be multiplied.
328 /// This is primarily used to bump up the inlining threshold wholesale on
329 /// targets where calls are unusually expensive.
330 ///
331 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
332 /// individual classes of instructions would be better.
333 unsigned getInliningThresholdMultiplier() const;
334
335 /// \returns A value to be added to the inlining threshold.
336 unsigned adjustInliningThreshold(const CallBase *CB) const;
337
338 /// \returns Vector bonus in percent.
339 ///
340 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
341 /// and apply this bonus based on the percentage of vector instructions. A
342 /// bonus is applied if the vector instructions exceed 50% and half that
343 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
344 /// arbitrary and evolved over time by accident as much as because they are
345 /// principled bonuses.
346 /// FIXME: It would be nice to base the bonus values on something more
347 /// scientific. A target may has no bonus on vector instructions.
349
350 /// \return the expected cost of a memcpy, which could e.g. depend on the
351 /// source/destination type and alignment and the number of bytes copied.
353
354 /// \return The estimated number of case clusters when lowering \p 'SI'.
355 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
356 /// table.
358 unsigned &JTSize,
360 BlockFrequencyInfo *BFI) const;
361
362 /// Estimate the cost of a given IR user when lowered.
363 ///
364 /// This can estimate the cost of either a ConstantExpr or Instruction when
365 /// lowered.
366 ///
367 /// \p Operands is a list of operands which can be a result of transformations
368 /// of the current operands. The number of the operands on the list must equal
369 /// to the number of the current operands the IR user has. Their order on the
370 /// list must be the same as the order of the current operands the IR user
371 /// has.
372 ///
373 /// The returned cost is defined in terms of \c TargetCostConstants, see its
374 /// comments for a detailed explanation of the cost values.
378
379 /// This is a helper function which calls the three-argument
380 /// getInstructionCost with \p Operands which are the current operands U has.
382 TargetCostKind CostKind) const {
383 SmallVector<const Value *, 4> Operands(U->operand_values());
385 }
386
387 /// If a branch or a select condition is skewed in one direction by more than
388 /// this factor, it is very likely to be predicted correctly.
390
391 /// Return true if branch divergence exists.
392 ///
393 /// Branch divergence has a significantly negative impact on GPU performance
394 /// when threads in the same wavefront take different paths due to conditional
395 /// branches.
396 bool hasBranchDivergence() const;
397
398 /// Return true if the target prefers to use GPU divergence analysis to
399 /// replace the legacy version.
400 bool useGPUDivergenceAnalysis() const;
401
402 /// Returns whether V is a source of divergence.
403 ///
404 /// This function provides the target-dependent information for
405 /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
406 /// first builds the dependency graph, and then runs the reachability
407 /// algorithm starting with the sources of divergence.
408 bool isSourceOfDivergence(const Value *V) const;
409
410 // Returns true for the target specific
411 // set of operations which produce uniform result
412 // even taking non-uniform arguments
413 bool isAlwaysUniform(const Value *V) const;
414
415 /// Returns the address space ID for a target's 'flat' address space. Note
416 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
417 /// refers to as the generic address space. The flat address space is a
418 /// generic address space that can be used access multiple segments of memory
419 /// with different address spaces. Access of a memory location through a
420 /// pointer with this address space is expected to be legal but slower
421 /// compared to the same memory location accessed through a pointer with a
422 /// different address space.
423 //
424 /// This is for targets with different pointer representations which can
425 /// be converted with the addrspacecast instruction. If a pointer is converted
426 /// to this address space, optimizations should attempt to replace the access
427 /// with the source address space.
428 ///
429 /// \returns ~0u if the target does not have such a flat address space to
430 /// optimize away.
431 unsigned getFlatAddressSpace() const;
432
433 /// Return any intrinsic address operand indexes which may be rewritten if
434 /// they use a flat address space pointer.
435 ///
436 /// \returns true if the intrinsic was handled.
438 Intrinsic::ID IID) const;
439
440 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
441
442 /// Return true if globals in this address space can have initializers other
443 /// than `undef`.
445
446 unsigned getAssumedAddrSpace(const Value *V) const;
447
448 bool isSingleThreaded() const;
449
450 std::pair<const Value *, unsigned>
451 getPredicatedAddrSpace(const Value *V) const;
452
453 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
454 /// NewV, which has a different address space. This should happen for every
455 /// operand index that collectFlatAddressOperands returned for the intrinsic.
456 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
457 /// new value (which may be the original \p II with modified operands).
459 Value *NewV) const;
460
461 /// Test whether calls to a function lower to actual program function
462 /// calls.
463 ///
464 /// The idea is to test whether the program is likely to require a 'call'
465 /// instruction or equivalent in order to call the given function.
466 ///
467 /// FIXME: It's not clear that this is a good or useful query API. Client's
468 /// should probably move to simpler cost metrics using the above.
469 /// Alternatively, we could split the cost interface into distinct code-size
470 /// and execution-speed costs. This would allow modelling the core of this
471 /// query more accurately as a call is a single small instruction, but
472 /// incurs significant execution cost.
473 bool isLoweredToCall(const Function *F) const;
474
475 struct LSRCost {
476 /// TODO: Some of these could be merged. Also, a lexical ordering
477 /// isn't always optimal.
478 unsigned Insns;
479 unsigned NumRegs;
480 unsigned AddRecCost;
481 unsigned NumIVMuls;
482 unsigned NumBaseAdds;
483 unsigned ImmCost;
484 unsigned SetupCost;
485 unsigned ScaleCost;
486 };
487
488 /// Parameters that control the generic loop unrolling transformation.
490 /// The cost threshold for the unrolled loop. Should be relative to the
491 /// getInstructionCost values returned by this API, and the expectation is
492 /// that the unrolled loop's instructions when run through that interface
493 /// should not exceed this cost. However, this is only an estimate. Also,
494 /// specific loops may be unrolled even with a cost above this threshold if
495 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
496 /// restriction.
497 unsigned Threshold;
498 /// If complete unrolling will reduce the cost of the loop, we will boost
499 /// the Threshold by a certain percent to allow more aggressive complete
500 /// unrolling. This value provides the maximum boost percentage that we
501 /// can apply to Threshold (The value should be no less than 100).
502 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
503 /// MaxPercentThresholdBoost / 100)
504 /// E.g. if complete unrolling reduces the loop execution time by 50%
505 /// then we boost the threshold by the factor of 2x. If unrolling is not
506 /// expected to reduce the running time, then we do not increase the
507 /// threshold.
509 /// The cost threshold for the unrolled loop when optimizing for size (set
510 /// to UINT_MAX to disable).
512 /// The cost threshold for the unrolled loop, like Threshold, but used
513 /// for partial/runtime unrolling (set to UINT_MAX to disable).
515 /// The cost threshold for the unrolled loop when optimizing for size, like
516 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
517 /// UINT_MAX to disable).
519 /// A forced unrolling factor (the number of concatenated bodies of the
520 /// original loop in the unrolled loop body). When set to 0, the unrolling
521 /// transformation will select an unrolling factor based on the current cost
522 /// threshold and other factors.
523 unsigned Count;
524 /// Default unroll count for loops with run-time trip count.
526 // Set the maximum unrolling factor. The unrolling factor may be selected
527 // using the appropriate cost threshold, but may not exceed this number
528 // (set to UINT_MAX to disable). This does not apply in cases where the
529 // loop is being fully unrolled.
530 unsigned MaxCount;
531 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
532 /// applies even if full unrolling is selected. This allows a target to fall
533 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
535 // Represents number of instructions optimized when "back edge"
536 // becomes "fall through" in unrolled loop.
537 // For now we count a conditional branch on a backedge and a comparison
538 // feeding it.
539 unsigned BEInsns;
540 /// Allow partial unrolling (unrolling of loops to expand the size of the
541 /// loop body, not only to eliminate small constant-trip-count loops).
543 /// Allow runtime unrolling (unrolling of loops to expand the size of the
544 /// loop body even when the number of loop iterations is not known at
545 /// compile time).
547 /// Allow generation of a loop remainder (extra iterations after unroll).
549 /// Allow emitting expensive instructions (such as divisions) when computing
550 /// the trip count of a loop for runtime unrolling.
552 /// Apply loop unroll on any kind of loop
553 /// (mainly to loops that fail runtime unrolling).
554 bool Force;
555 /// Allow using trip count upper bound to unroll loops.
557 /// Allow unrolling of all the iterations of the runtime loop remainder.
559 /// Allow unroll and jam. Used to enable unroll and jam for the target.
561 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
562 /// value above is used during unroll and jam for the outer loop size.
563 /// This value is used in the same manner to limit the size of the inner
564 /// loop.
566 /// Don't allow loop unrolling to simulate more than this number of
567 /// iterations when checking full unroll profitability
569 };
570
571 /// Get target-customized preferences for the generic loop unrolling
572 /// transformation. The caller will initialize UP with the current
573 /// target-independent defaults.
576 OptimizationRemarkEmitter *ORE) const;
577
578 /// Query the target whether it would be profitable to convert the given loop
579 /// into a hardware loop.
582 HardwareLoopInfo &HWLoopInfo) const;
583
584 /// Query the target whether it would be prefered to create a predicated
585 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
588 DominatorTree *DT,
590 InterleavedAccessInfo *IAI) const;
591
592 /// Query the target what the preferred style of tail folding is.
593 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
594 /// may (or will never) overflow for the suggested VF/UF in the given loop.
595 /// Targets can use this information to select a more optimal tail folding
596 /// style. The value conservatively defaults to true, such that no assumptions
597 /// are made on overflow.
599 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
600
601 // Parameters that control the loop peeling transformation
603 /// A forced peeling factor (the number of bodied of the original loop
604 /// that should be peeled off before the loop body). When set to 0, the
605 /// a peeling factor based on profile information and other factors.
606 unsigned PeelCount;
607 /// Allow peeling off loop iterations.
609 /// Allow peeling off loop iterations for loop nests.
611 /// Allow peeling basing on profile. Uses to enable peeling off all
612 /// iterations basing on provided profile.
613 /// If the value is true the peeling cost model can decide to peel only
614 /// some iterations and in this case it will set this to false.
616 };
617
618 /// Get target-customized preferences for the generic loop peeling
619 /// transformation. The caller will initialize \p PP with the current
620 /// target-independent defaults with information from \p L and \p SE.
622 PeelingPreferences &PP) const;
623
624 /// Targets can implement their own combinations for target-specific
625 /// intrinsics. This function will be called from the InstCombine pass every
626 /// time a target-specific intrinsic is encountered.
627 ///
628 /// \returns std::nullopt to not do anything target specific or a value that
629 /// will be returned from the InstCombiner. It is possible to return null and
630 /// stop further processing of the intrinsic by returning nullptr.
631 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
632 IntrinsicInst & II) const;
633 /// Can be used to implement target-specific instruction combining.
634 /// \see instCombineIntrinsic
635 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
636 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
637 KnownBits & Known, bool &KnownBitsComputed) const;
638 /// Can be used to implement target-specific instruction combining.
639 /// \see instCombineIntrinsic
640 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
641 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
642 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
643 std::function<void(Instruction *, unsigned, APInt, APInt &)>
644 SimplifyAndSetOp) const;
645 /// @}
646
647 /// \name Scalar Target Information
648 /// @{
649
650 /// Flags indicating the kind of support for population count.
651 ///
652 /// Compared to the SW implementation, HW support is supposed to
653 /// significantly boost the performance when the population is dense, and it
654 /// may or may not degrade performance if the population is sparse. A HW
655 /// support is considered as "Fast" if it can outperform, or is on a par
656 /// with, SW implementation when the population is sparse; otherwise, it is
657 /// considered as "Slow".
659
660 /// Return true if the specified immediate is legal add immediate, that
661 /// is the target has add instructions which can add a register with the
662 /// immediate without having to materialize the immediate into a register.
663 bool isLegalAddImmediate(int64_t Imm) const;
664
665 /// Return true if the specified immediate is legal icmp immediate,
666 /// that is the target has icmp instructions which can compare a register
667 /// against the immediate without having to materialize the immediate into a
668 /// register.
669 bool isLegalICmpImmediate(int64_t Imm) const;
670
671 /// Return true if the addressing mode represented by AM is legal for
672 /// this target, for a load/store of the specified type.
673 /// The type may be VoidTy, in which case only return true if the addressing
674 /// mode is legal for a load/store of any legal type.
675 /// If target returns true in LSRWithInstrQueries(), I may be valid.
676 /// TODO: Handle pre/postinc as well.
677 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
678 bool HasBaseReg, int64_t Scale,
679 unsigned AddrSpace = 0,
680 Instruction *I = nullptr) const;
681
682 /// Return true if LSR cost of C1 is lower than C2.
684 const TargetTransformInfo::LSRCost &C2) const;
685
686 /// Return true if LSR major cost is number of registers. Targets which
687 /// implement their own isLSRCostLess and unset number of registers as major
688 /// cost should return false, otherwise return true.
689 bool isNumRegsMajorCostOfLSR() const;
690
691 /// \returns true if LSR should not optimize a chain that includes \p I.
693
694 /// Return true if the target can fuse a compare and branch.
695 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
696 /// calculation for the instructions in a loop.
697 bool canMacroFuseCmp() const;
698
699 /// Return true if the target can save a compare for loop count, for example
700 /// hardware loop saves a compare.
701 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
703 TargetLibraryInfo *LibInfo) const;
704
709 };
710
711 /// Return the preferred addressing mode LSR should make efforts to generate.
713 ScalarEvolution *SE) const;
714
715 /// Return true if the target supports masked store.
716 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
717 /// Return true if the target supports masked load.
718 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
719
720 /// Return true if the target supports nontemporal store.
721 bool isLegalNTStore(Type *DataType, Align Alignment) const;
722 /// Return true if the target supports nontemporal load.
723 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
724
725 /// \Returns true if the target supports broadcasting a load to a vector of
726 /// type <NumElements x ElementTy>.
727 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
728
729 /// Return true if the target supports masked scatter.
730 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
731 /// Return true if the target supports masked gather.
732 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
733 /// Return true if the target forces scalarizing of llvm.masked.gather
734 /// intrinsics.
735 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
736 /// Return true if the target forces scalarizing of llvm.masked.scatter
737 /// intrinsics.
738 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
739
740 /// Return true if the target supports masked compress store.
741 bool isLegalMaskedCompressStore(Type *DataType) const;
742 /// Return true if the target supports masked expand load.
743 bool isLegalMaskedExpandLoad(Type *DataType) const;
744
745 /// Return true if this is an alternating opcode pattern that can be lowered
746 /// to a single instruction on the target. In X86 this is for the addsub
747 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
748 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
749 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
750 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
751 /// \p VecTy is the vector type of the instruction to be generated.
752 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
753 const SmallBitVector &OpcodeMask) const;
754
755 /// Return true if we should be enabling ordered reductions for the target.
756 bool enableOrderedReductions() const;
757
758 /// Return true if the target has a unified operation to calculate division
759 /// and remainder. If so, the additional implicit multiplication and
760 /// subtraction required to calculate a remainder from division are free. This
761 /// can enable more aggressive transformations for division and remainder than
762 /// would typically be allowed using throughput or size cost models.
763 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
764
765 /// Return true if the given instruction (assumed to be a memory access
766 /// instruction) has a volatile variant. If that's the case then we can avoid
767 /// addrspacecast to generic AS for volatile loads/stores. Default
768 /// implementation returns false, which prevents address space inference for
769 /// volatile loads/stores.
770 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
771
772 /// Return true if target doesn't mind addresses in vectors.
773 bool prefersVectorizedAddressing() const;
774
775 /// Return the cost of the scaling factor used in the addressing
776 /// mode represented by AM for this target, for a load/store
777 /// of the specified type.
778 /// If the AM is supported, the return value must be >= 0.
779 /// If the AM is not supported, it returns a negative value.
780 /// TODO: Handle pre/postinc as well.
782 int64_t BaseOffset, bool HasBaseReg,
783 int64_t Scale,
784 unsigned AddrSpace = 0) const;
785
786 /// Return true if the loop strength reduce pass should make
787 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
788 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
789 /// immediate offset and no index register.
790 bool LSRWithInstrQueries() const;
791
792 /// Return true if it's free to truncate a value of type Ty1 to type
793 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
794 /// by referencing its sub-register AX.
795 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
796
797 /// Return true if it is profitable to hoist instruction in the
798 /// then/else to before if.
799 bool isProfitableToHoist(Instruction *I) const;
800
801 bool useAA() const;
802
803 /// Return true if this type is legal.
804 bool isTypeLegal(Type *Ty) const;
805
806 /// Returns the estimated number of registers required to represent \p Ty.
807 unsigned getRegUsageForType(Type *Ty) const;
808
809 /// Return true if switches should be turned into lookup tables for the
810 /// target.
811 bool shouldBuildLookupTables() const;
812
813 /// Return true if switches should be turned into lookup tables
814 /// containing this constant value for the target.
816
817 /// Return true if lookup tables should be turned into relative lookup tables.
818 bool shouldBuildRelLookupTables() const;
819
820 /// Return true if the input function which is cold at all call sites,
821 /// should use coldcc calling convention.
822 bool useColdCCForColdCall(Function &F) const;
823
824 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
825 /// are set if the demanded result elements need to be inserted and/or
826 /// extracted from vectors.
828 const APInt &DemandedElts,
829 bool Insert, bool Extract,
831
832 /// Estimate the overhead of scalarizing an instructions unique
833 /// non-constant operands. The (potentially vector) types to use for each of
834 /// argument are passes via Tys.
839
840 /// If target has efficient vector element load/store instructions, it can
841 /// return true here so that insertion/extraction costs are not added to
842 /// the scalarization cost of a load/store.
844
845 /// If the target supports tail calls.
846 bool supportsTailCalls() const;
847
848 /// If target supports tail call on \p CB
849 bool supportsTailCallFor(const CallBase *CB) const;
850
851 /// Don't restrict interleaved unrolling to small loops.
852 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
853
854 /// Returns options for expansion of memcmp. IsZeroCmp is
855 // true if this is the expansion of memcmp(p1, p2, s) == 0.
857 // Return true if memcmp expansion is enabled.
858 operator bool() const { return MaxNumLoads > 0; }
859
860 // Maximum number of load operations.
861 unsigned MaxNumLoads = 0;
862
863 // The list of available load sizes (in bytes), sorted in decreasing order.
865
866 // For memcmp expansion when the memcmp result is only compared equal or
867 // not-equal to 0, allow up to this number of load pairs per block. As an
868 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
869 // a0 = load2bytes &a[0]
870 // b0 = load2bytes &b[0]
871 // a2 = load1byte &a[2]
872 // b2 = load1byte &b[2]
873 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
874 unsigned NumLoadsPerBlock = 1;
875
876 // Set to true to allow overlapping loads. For example, 7-byte compares can
877 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
878 // requires all loads in LoadSizes to be doable in an unaligned way.
880 };
882 bool IsZeroCmp) const;
883
884 /// Should the Select Optimization pass be enabled and ran.
885 bool enableSelectOptimize() const;
886
887 /// Enable matching of interleaved access groups.
889
890 /// Enable matching of interleaved access groups that contain predicated
891 /// accesses or gaps and therefore vectorized using masked
892 /// vector loads/stores.
894
895 /// Indicate that it is potentially unsafe to automatically vectorize
896 /// floating-point operations because the semantics of vector and scalar
897 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
898 /// does not support IEEE-754 denormal numbers, while depending on the
899 /// platform, scalar floating-point math does.
900 /// This applies to floating-point math operations and calls, not memory
901 /// operations, shuffles, or casts.
903
904 /// Determine if the target supports unaligned memory accesses.
906 unsigned AddressSpace = 0,
907 Align Alignment = Align(1),
908 unsigned *Fast = nullptr) const;
909
910 /// Return hardware support for population count.
911 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
912
913 /// Return true if the hardware has a fast square-root instruction.
914 bool haveFastSqrt(Type *Ty) const;
915
916 /// Return true if the cost of the instruction is too high to speculatively
917 /// execute and should be kept behind a branch.
918 /// This normally just wraps around a getInstructionCost() call, but some
919 /// targets might report a low TCK_SizeAndLatency value that is incompatible
920 /// with the fixed TCC_Expensive value.
921 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
923
924 /// Return true if it is faster to check if a floating-point value is NaN
925 /// (or not-NaN) versus a comparison against a constant FP zero value.
926 /// Targets should override this if materializing a 0.0 for comparison is
927 /// generally as cheap as checking for ordered/unordered.
928 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
929
930 /// Return the expected cost of supporting the floating point operation
931 /// of the specified type.
933
934 /// Return the expected cost of materializing for the given integer
935 /// immediate of the specified type.
936 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
938
939 /// Return the expected cost of materialization for the given integer
940 /// immediate of the specified type for a given instruction. The cost can be
941 /// zero if the immediate can be folded into the specified instruction.
942 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
943 const APInt &Imm, Type *Ty,
945 Instruction *Inst = nullptr) const;
947 const APInt &Imm, Type *Ty,
949
950 /// Return the expected cost for the given integer when optimising
951 /// for size. This is different than the other integer immediate cost
952 /// functions in that it is subtarget agnostic. This is useful when you e.g.
953 /// target one ISA such as Aarch32 but smaller encodings could be possible
954 /// with another such as Thumb. This return value is used as a penalty when
955 /// the total costs for a constant is calculated (the bigger the cost, the
956 /// more beneficial constant hoisting is).
957 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
958 const APInt &Imm, Type *Ty) const;
959 /// @}
960
961 /// \name Vector Target Information
962 /// @{
963
964 /// The various kinds of shuffle patterns for vector queries.
966 SK_Broadcast, ///< Broadcast element 0 to all other elements.
967 SK_Reverse, ///< Reverse the order of the vector.
968 SK_Select, ///< Selects elements from the corresponding lane of
969 ///< either source operand. This is equivalent to a
970 ///< vector select with a constant condition operand.
971 SK_Transpose, ///< Transpose two vectors.
972 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
973 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
974 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
975 ///< with any shuffle mask.
976 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
977 ///< shuffle mask.
978 SK_Splice ///< Concatenates elements from the first input vector
979 ///< with elements of the second input vector. Returning
980 ///< a vector of the same type as the input vectors.
981 ///< Index indicates start offset in first input vector.
982 };
983
984 /// Additional information about an operand's possible values.
986 OK_AnyValue, // Operand can have any value.
987 OK_UniformValue, // Operand is uniform (splat of a value).
988 OK_UniformConstantValue, // Operand is uniform constant.
989 OK_NonUniformConstantValue // Operand is a non uniform constant value.
990 };
991
992 /// Additional properties of an operand's values.
997 };
998
999 // Describe the values an operand can take. We're in the process
1000 // of migrating uses of OperandValueKind and OperandValueProperties
1001 // to use this class, and then will change the internal representation.
1005
1006 bool isConstant() const {
1008 }
1009 bool isUniform() const {
1011 }
1012 bool isPowerOf2() const {
1013 return Properties == OP_PowerOf2;
1014 }
1015 bool isNegatedPowerOf2() const {
1017 }
1018
1020 return {Kind, OP_None};
1021 }
1022 };
1023
1024 /// \return the number of registers in the target-provided register class.
1025 unsigned getNumberOfRegisters(unsigned ClassID) const;
1026
1027 /// \return the target-provided register class ID for the provided type,
1028 /// accounting for type promotion and other type-legalization techniques that
1029 /// the target might apply. However, it specifically does not account for the
1030 /// scalarization or splitting of vector types. Should a vector type require
1031 /// scalarization or splitting into multiple underlying vector registers, that
1032 /// type should be mapped to a register class containing no registers.
1033 /// Specifically, this is designed to provide a simple, high-level view of the
1034 /// register allocation later performed by the backend. These register classes
1035 /// don't necessarily map onto the register classes used by the backend.
1036 /// FIXME: It's not currently possible to determine how many registers
1037 /// are used by the provided type.
1038 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1039
1040 /// \return the target-provided register class name
1041 const char *getRegisterClassName(unsigned ClassID) const;
1042
1044
1045 /// \return The width of the largest scalar or vector register type.
1047
1048 /// \return The width of the smallest vector register type.
1049 unsigned getMinVectorRegisterBitWidth() const;
1050
1051 /// \return The maximum value of vscale if the target specifies an
1052 /// architectural maximum vector length, and std::nullopt otherwise.
1053 std::optional<unsigned> getMaxVScale() const;
1054
1055 /// \return the value of vscale to tune the cost model for.
1056 std::optional<unsigned> getVScaleForTuning() const;
1057
1058 /// \return True if the vectorization factor should be chosen to
1059 /// make the vector of the smallest element type match the size of a
1060 /// vector register. For wider element types, this could result in
1061 /// creating vectors that span multiple vector registers.
1062 /// If false, the vectorization factor will be chosen based on the
1063 /// size of the widest element type.
1064 /// \p K Register Kind for vectorization.
1066
1067 /// \return The minimum vectorization factor for types of given element
1068 /// bit width, or 0 if there is no minimum VF. The returned value only
1069 /// applies when shouldMaximizeVectorBandwidth returns true.
1070 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1071 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1072
1073 /// \return The maximum vectorization factor for types of given element
1074 /// bit width and opcode, or 0 if there is no maximum VF.
1075 /// Currently only used by the SLP vectorizer.
1076 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1077
1078 /// \return The minimum vectorization factor for the store instruction. Given
1079 /// the initial estimation of the minimum vector factor and store value type,
1080 /// it tries to find possible lowest VF, which still might be profitable for
1081 /// the vectorization.
1082 /// \param VF Initial estimation of the minimum vector factor.
1083 /// \param ScalarMemTy Scalar memory type of the store operation.
1084 /// \param ScalarValTy Scalar type of the stored value.
1085 /// Currently only used by the SLP vectorizer.
1086 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1087 Type *ScalarValTy) const;
1088
1089 /// \return True if it should be considered for address type promotion.
1090 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1091 /// profitable without finding other extensions fed by the same input.
1093 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1094
1095 /// \return The size of a cache line in bytes.
1096 unsigned getCacheLineSize() const;
1097
1098 /// The possible cache levels
1099 enum class CacheLevel {
1100 L1D, // The L1 data cache
1101 L2D, // The L2 data cache
1102
1103 // We currently do not model L3 caches, as their sizes differ widely between
1104 // microarchitectures. Also, we currently do not have a use for L3 cache
1105 // size modeling yet.
1106 };
1107
1108 /// \return The size of the cache level in bytes, if available.
1109 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1110
1111 /// \return The associativity of the cache level, if available.
1112 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1113
1114 /// \return How much before a load we should place the prefetch
1115 /// instruction. This is currently measured in number of
1116 /// instructions.
1117 unsigned getPrefetchDistance() const;
1118
1119 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1120 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1121 /// and the arguments provided are meant to serve as a basis for deciding this
1122 /// for a particular loop.
1123 ///
1124 /// \param NumMemAccesses Number of memory accesses in the loop.
1125 /// \param NumStridedMemAccesses Number of the memory accesses that
1126 /// ScalarEvolution could find a known stride
1127 /// for.
1128 /// \param NumPrefetches Number of software prefetches that will be
1129 /// emitted as determined by the addresses
1130 /// involved and the cache line size.
1131 /// \param HasCall True if the loop contains a call.
1132 ///
1133 /// \return This is the minimum stride in bytes where it makes sense to start
1134 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1135 /// stride.
1136 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1137 unsigned NumStridedMemAccesses,
1138 unsigned NumPrefetches, bool HasCall) const;
1139
1140 /// \return The maximum number of iterations to prefetch ahead. If
1141 /// the required number of iterations is more than this number, no
1142 /// prefetching is performed.
1143 unsigned getMaxPrefetchIterationsAhead() const;
1144
1145 /// \return True if prefetching should also be done for writes.
1146 bool enableWritePrefetching() const;
1147
1148 /// \return if target want to issue a prefetch in address space \p AS.
1149 bool shouldPrefetchAddressSpace(unsigned AS) const;
1150
1151 /// \return The maximum interleave factor that any transform should try to
1152 /// perform for this target. This number depends on the level of parallelism
1153 /// and the number of execution units in the CPU.
1154 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1155
1156 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1157 static OperandValueInfo getOperandInfo(const Value *V);
1158
1159 /// This is an approximation of reciprocal throughput of a math/logic op.
1160 /// A higher cost indicates less expected throughput.
1161 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1162 /// clock cycles per instruction when the instructions are not part of a
1163 /// limiting dependency chain."
1164 /// Therefore, costs should be scaled to account for multiple execution units
1165 /// on the target that can process this type of instruction. For example, if
1166 /// there are 5 scalar integer units and 2 vector integer units that can
1167 /// calculate an 'add' in a single cycle, this model should indicate that the
1168 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1169 /// add instruction.
1170 /// \p Args is an optional argument which holds the instruction operands
1171 /// values so the TTI can analyze those values searching for special
1172 /// cases or optimizations based on those values.
1173 /// \p CxtI is the optional original context instruction, if one exists, to
1174 /// provide even more information.
1176 unsigned Opcode, Type *Ty,
1179 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1180 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1181 const Instruction *CxtI = nullptr) const;
1182
1183 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1184 /// The exact mask may be passed as Mask, or else the array will be empty.
1185 /// The index and subtype parameters are used by the subvector insertion and
1186 /// extraction shuffle kinds to show the insert/extract point and the type of
1187 /// the subvector being inserted/extracted. The operands of the shuffle can be
1188 /// passed through \p Args, which helps improve the cost estimation in some
1189 /// cases, like in broadcast loads.
1190 /// NOTE: For subvector extractions Tp represents the source type.
1191 InstructionCost
1193 ArrayRef<int> Mask = std::nullopt,
1195 int Index = 0, VectorType *SubTp = nullptr,
1196 ArrayRef<const Value *> Args = std::nullopt) const;
1197
1198 /// Represents a hint about the context in which a cast is used.
1199 ///
1200 /// For zext/sext, the context of the cast is the operand, which must be a
1201 /// load of some kind. For trunc, the context is of the cast is the single
1202 /// user of the instruction, which must be a store of some kind.
1203 ///
1204 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1205 /// type of cast it's dealing with, as not every cast is equal. For instance,
1206 /// the zext of a load may be free, but the zext of an interleaving load can
1207 //// be (very) expensive!
1208 ///
1209 /// See \c getCastContextHint to compute a CastContextHint from a cast
1210 /// Instruction*. Callers can use it if they don't need to override the
1211 /// context and just want it to be calculated from the instruction.
1212 ///
1213 /// FIXME: This handles the types of load/store that the vectorizer can
1214 /// produce, which are the cases where the context instruction is most
1215 /// likely to be incorrect. There are other situations where that can happen
1216 /// too, which might be handled here but in the long run a more general
1217 /// solution of costing multiple instructions at the same times may be better.
1218 enum class CastContextHint : uint8_t {
1219 None, ///< The cast is not used with a load/store of any kind.
1220 Normal, ///< The cast is used with a normal load/store.
1221 Masked, ///< The cast is used with a masked load/store.
1222 GatherScatter, ///< The cast is used with a gather/scatter.
1223 Interleave, ///< The cast is used with an interleaved load/store.
1224 Reversed, ///< The cast is used with a reversed load/store.
1225 };
1226
1227 /// Calculates a CastContextHint from \p I.
1228 /// This should be used by callers of getCastInstrCost if they wish to
1229 /// determine the context from some instruction.
1230 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1231 /// or if it's another type of cast.
1233
1234 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1235 /// zext, etc. If there is an existing instruction that holds Opcode, it
1236 /// may be passed in the 'I' parameter.
1238 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1241 const Instruction *I = nullptr) const;
1242
1243 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1244 /// Index = -1 to indicate that there is no information about the index value.
1245 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1246 VectorType *VecTy,
1247 unsigned Index) const;
1248
1249 /// \return The expected cost of control-flow related instructions such as
1250 /// Phi, Ret, Br, Switch.
1252 getCFInstrCost(unsigned Opcode,
1254 const Instruction *I = nullptr) const;
1255
1256 /// \returns The expected cost of compare and select instructions. If there
1257 /// is an existing instruction that holds Opcode, it may be passed in the
1258 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1259 /// is using a compare with the specified predicate as condition. When vector
1260 /// types are passed, \p VecPred must be used for all lanes.
1262 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1263 CmpInst::Predicate VecPred,
1265 const Instruction *I = nullptr) const;
1266
1267 /// \return The expected cost of vector Insert and Extract.
1268 /// Use -1 to indicate that there is no information on the index value.
1269 /// This is used when the instruction is not available; a typical use
1270 /// case is to provision the cost of vectorization/scalarization in
1271 /// vectorizer passes.
1272 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1274 unsigned Index = -1, Value *Op0 = nullptr,
1275 Value *Op1 = nullptr) const;
1276
1277 /// \return The expected cost of vector Insert and Extract.
1278 /// This is used when instruction is available, and implementation
1279 /// asserts 'I' is not nullptr.
1280 ///
1281 /// A typical suitable use case is cost estimation when vector instruction
1282 /// exists (e.g., from basic blocks during transformation).
1285 unsigned Index = -1) const;
1286
1287 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1288 /// \p ReplicationFactor times.
1289 ///
1290 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1291 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1292 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1293 int VF,
1294 const APInt &DemandedDstElts,
1296
1297 /// \return The cost of Load and Store instructions.
1299 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1300 unsigned AddressSpace,
1302 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1303 const Instruction *I = nullptr) const;
1304
1305 /// \return The cost of VP Load and Store instructions.
1306 InstructionCost
1307 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1308 unsigned AddressSpace,
1310 const Instruction *I = nullptr) const;
1311
1312 /// \return The cost of masked Load and Store instructions.
1314 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1316
1317 /// \return The cost of Gather or Scatter operation
1318 /// \p Opcode - is a type of memory access Load or Store
1319 /// \p DataTy - a vector type of the data to be loaded or stored
1320 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1321 /// \p VariableMask - true when the memory access is predicated with a mask
1322 /// that is not a compile-time constant
1323 /// \p Alignment - alignment of single element
1324 /// \p I - the optional original context instruction, if one exists, e.g. the
1325 /// load/store to transform or the call to the gather/scatter intrinsic
1327 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1329 const Instruction *I = nullptr) const;
1330
1331 /// \return The cost of the interleaved memory operation.
1332 /// \p Opcode is the memory operation code
1333 /// \p VecTy is the vector type of the interleaved access.
1334 /// \p Factor is the interleave factor
1335 /// \p Indices is the indices for interleaved load members (as interleaved
1336 /// load allows gaps)
1337 /// \p Alignment is the alignment of the memory operation
1338 /// \p AddressSpace is address space of the pointer.
1339 /// \p UseMaskForCond indicates if the memory access is predicated.
1340 /// \p UseMaskForGaps indicates if gaps should be masked.
1342 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1343 Align Alignment, unsigned AddressSpace,
1345 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1346
1347 /// A helper function to determine the type of reduction algorithm used
1348 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1349 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1350 return FMF && !(*FMF).allowReassoc();
1351 }
1352
1353 /// Calculate the cost of vector reduction intrinsics.
1354 ///
1355 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1356 /// value using the operation denoted by \p Opcode. The FastMathFlags
1357 /// parameter \p FMF indicates what type of reduction we are performing:
1358 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1359 /// involves successively splitting a vector into half and doing the
1360 /// operation on the pair of halves until you have a scalar value. For
1361 /// example:
1362 /// (v0, v1, v2, v3)
1363 /// ((v0+v2), (v1+v3), undef, undef)
1364 /// ((v0+v2+v1+v3), undef, undef, undef)
1365 /// This is the default behaviour for integer operations, whereas for
1366 /// floating point we only do this if \p FMF indicates that
1367 /// reassociation is allowed.
1368 /// 2. Ordered. For a vector with N elements this involves performing N
1369 /// operations in lane order, starting with an initial scalar value, i.e.
1370 /// result = InitVal + v0
1371 /// result = result + v1
1372 /// result = result + v2
1373 /// result = result + v3
1374 /// This is only the case for FP operations and when reassociation is not
1375 /// allowed.
1376 ///
1378 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1380
1382 VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1384
1385 /// Calculate the cost of an extended reduction pattern, similar to
1386 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1387 /// extensions. This is the cost of as:
1388 /// ResTy vecreduce.add(mul (A, B)).
1389 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1391 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1393
1394 /// Calculate the cost of an extended reduction pattern, similar to
1395 /// getArithmeticReductionCost of a reduction with an extension.
1396 /// This is the cost of as:
1397 /// ResTy vecreduce.opcode(ext(Ty A)).
1399 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1400 std::optional<FastMathFlags> FMF,
1402
1403 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1404 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1405 /// 3. scalar instruction which is to be vectorized.
1408
1409 /// \returns The cost of Call instructions.
1413
1414 /// \returns The number of pieces into which the provided type must be
1415 /// split during legalization. Zero is returned when the answer is unknown.
1416 unsigned getNumberOfParts(Type *Tp) const;
1417
1418 /// \returns The cost of the address computation. For most targets this can be
1419 /// merged into the instruction indexing mode. Some targets might want to
1420 /// distinguish between address computation for memory operations on vector
1421 /// types and scalar types. Such targets should override this function.
1422 /// The 'SE' parameter holds pointer for the scalar evolution object which
1423 /// is used in order to get the Ptr step value in case of constant stride.
1424 /// The 'Ptr' parameter holds SCEV of the access pointer.
1426 ScalarEvolution *SE = nullptr,
1427 const SCEV *Ptr = nullptr) const;
1428
1429 /// \returns The cost, if any, of keeping values of the given types alive
1430 /// over a callsite.
1431 ///
1432 /// Some types may require the use of register classes that do not have
1433 /// any callee-saved registers, so would require a spill and fill.
1435
1436 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1437 /// will contain additional information - whether the intrinsic may write
1438 /// or read to memory, volatility and the pointer. Info is undefined
1439 /// if false is returned.
1441
1442 /// \returns The maximum element size, in bytes, for an element
1443 /// unordered-atomic memory intrinsic.
1444 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1445
1446 /// \returns A value which is the result of the given memory intrinsic. New
1447 /// instructions may be created to extract the result from the given intrinsic
1448 /// memory operation. Returns nullptr if the target cannot create a result
1449 /// from the given intrinsic.
1451 Type *ExpectedType) const;
1452
1453 /// \returns The type to use in a loop expansion of a memcpy call.
1455 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1456 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1457 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1458
1459 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1460 /// \param RemainingBytes The number of bytes to copy.
1461 ///
1462 /// Calculates the operand types to use when copying \p RemainingBytes of
1463 /// memory, where source and destination alignments are \p SrcAlign and
1464 /// \p DestAlign respectively.
1466 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1467 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1468 unsigned SrcAlign, unsigned DestAlign,
1469 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1470
1471 /// \returns True if the two functions have compatible attributes for inlining
1472 /// purposes.
1473 bool areInlineCompatible(const Function *Caller,
1474 const Function *Callee) const;
1475
1476 /// \returns True if the caller and callee agree on how \p Types will be
1477 /// passed to or returned from the callee.
1478 /// to the callee.
1479 /// \param Types List of types to check.
1480 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1481 const ArrayRef<Type *> &Types) const;
1482
1483 /// The type of load/store indexing.
1485 MIM_Unindexed, ///< No indexing.
1486 MIM_PreInc, ///< Pre-incrementing.
1487 MIM_PreDec, ///< Pre-decrementing.
1488 MIM_PostInc, ///< Post-incrementing.
1489 MIM_PostDec ///< Post-decrementing.
1491
1492 /// \returns True if the specified indexed load for the given type is legal.
1493 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1494
1495 /// \returns True if the specified indexed store for the given type is legal.
1496 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1497
1498 /// \returns The bitwidth of the largest vector type that should be used to
1499 /// load/store in the given address space.
1500 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1501
1502 /// \returns True if the load instruction is legal to vectorize.
1503 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1504
1505 /// \returns True if the store instruction is legal to vectorize.
1506 bool isLegalToVectorizeStore(StoreInst *SI) const;
1507
1508 /// \returns True if it is legal to vectorize the given load chain.
1509 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1510 unsigned AddrSpace) const;
1511
1512 /// \returns True if it is legal to vectorize the given store chain.
1513 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1514 unsigned AddrSpace) const;
1515
1516 /// \returns True if it is legal to vectorize the given reduction kind.
1518 ElementCount VF) const;
1519
1520 /// \returns True if the given type is supported for scalable vectors
1522
1523 /// \returns The new vector factor value if the target doesn't support \p
1524 /// SizeInBytes loads or has a better vector factor.
1525 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1526 unsigned ChainSizeInBytes,
1527 VectorType *VecTy) const;
1528
1529 /// \returns The new vector factor value if the target doesn't support \p
1530 /// SizeInBytes stores or has a better vector factor.
1531 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1532 unsigned ChainSizeInBytes,
1533 VectorType *VecTy) const;
1534
1535 /// Flags describing the kind of vector reduction.
1537 ReductionFlags() = default;
1538 bool IsMaxOp =
1539 false; ///< If the op a min/max kind, true if it's a max operation.
1540 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1541 bool NoNaN =
1542 false; ///< If op is an fp min/max, whether NaNs may be present.
1543 };
1544
1545 /// \returns True if the target prefers reductions in loop.
1546 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1547 ReductionFlags Flags) const;
1548
1549 /// \returns True if the target prefers reductions select kept in the loop
1550 /// when tail folding. i.e.
1551 /// loop:
1552 /// p = phi (0, s)
1553 /// a = add (p, x)
1554 /// s = select (mask, a, p)
1555 /// vecreduce.add(s)
1556 ///
1557 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1558 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1559 /// by the target, this can lead to cleaner code generation.
1560 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1561 ReductionFlags Flags) const;
1562
1563 /// Return true if the loop vectorizer should consider vectorizing an
1564 /// otherwise scalar epilogue loop.
1565 bool preferEpilogueVectorization() const;
1566
1567 /// \returns True if the target wants to expand the given reduction intrinsic
1568 /// into a shuffle sequence.
1569 bool shouldExpandReduction(const IntrinsicInst *II) const;
1570
1571 /// \returns the size cost of rematerializing a GlobalValue address relative
1572 /// to a stack reload.
1573 unsigned getGISelRematGlobalCost() const;
1574
1575 /// \returns the lower bound of a trip count to decide on vectorization
1576 /// while tail-folding.
1577 unsigned getMinTripCountTailFoldingThreshold() const;
1578
1579 /// \returns True if the target supports scalable vectors.
1580 bool supportsScalableVectors() const;
1581
1582 /// \return true when scalable vectorization is preferred.
1583 bool enableScalableVectorization() const;
1584
1585 /// \name Vector Predication Information
1586 /// @{
1587 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1588 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1589 /// Reference - "Vector Predication Intrinsics").
1590 /// Use of %evl is discouraged when that is not the case.
1591 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1592 Align Alignment) const;
1593
1596 // keep the predicating parameter
1598 // where legal, discard the predicate parameter
1600 // transform into something else that is also predicating
1601 Convert = 2
1603
1604 // How to transform the EVL parameter.
1605 // Legal: keep the EVL parameter as it is.
1606 // Discard: Ignore the EVL parameter where it is safe to do so.
1607 // Convert: Fold the EVL into the mask parameter.
1609
1610 // How to transform the operator.
1611 // Legal: The target supports this operator.
1612 // Convert: Convert this to a non-VP operation.
1613 // The 'Discard' strategy is invalid.
1615
1616 bool shouldDoNothing() const {
1617 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1618 }
1621 };
1622
1623 /// \returns How the target needs this vector-predicated operation to be
1624 /// transformed.
1626 /// @}
1627
1628 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1629 /// state.
1630 ///
1631 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1632 /// node containing a jump table in a format suitable for the target, so it
1633 /// needs to know what format of jump table it can legally use.
1634 ///
1635 /// For non-Arm targets, this function isn't used. It defaults to returning
1636 /// false, but it shouldn't matter what it returns anyway.
1637 bool hasArmWideBranch(bool Thumb) const;
1638
1639 /// @}
1640
1641private:
1642 /// The abstract base class used to type erase specific TTI
1643 /// implementations.
1644 class Concept;
1645
1646 /// The template model for the base class which wraps a concrete
1647 /// implementation in a type erased interface.
1648 template <typename T> class Model;
1649
1650 std::unique_ptr<Concept> TTIImpl;
1651};
1652
1654public:
1655 virtual ~Concept() = 0;
1656 virtual const DataLayout &getDataLayout() const = 0;
1657 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1660 virtual InstructionCost
1664 virtual unsigned getInliningThresholdMultiplier() = 0;
1665 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1668 virtual unsigned
1670 ProfileSummaryInfo *PSI,
1671 BlockFrequencyInfo *BFI) = 0;
1676 virtual bool hasBranchDivergence() = 0;
1677 virtual bool useGPUDivergenceAnalysis() = 0;
1678 virtual bool isSourceOfDivergence(const Value *V) = 0;
1679 virtual bool isAlwaysUniform(const Value *V) = 0;
1680 virtual unsigned getFlatAddressSpace() = 0;
1682 Intrinsic::ID IID) const = 0;
1683 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1684 virtual bool
1686 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1687 virtual bool isSingleThreaded() const = 0;
1688 virtual std::pair<const Value *, unsigned>
1689 getPredicatedAddrSpace(const Value *V) const = 0;
1691 Value *OldV,
1692 Value *NewV) const = 0;
1693 virtual bool isLoweredToCall(const Function *F) = 0;
1696 OptimizationRemarkEmitter *ORE) = 0;
1698 PeelingPreferences &PP) = 0;
1700 AssumptionCache &AC,
1701 TargetLibraryInfo *LibInfo,
1702 HardwareLoopInfo &HWLoopInfo) = 0;
1703 virtual bool
1707 InterleavedAccessInfo *IAI) = 0;
1708 virtual TailFoldingStyle
1709 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1710 virtual std::optional<Instruction *> instCombineIntrinsic(
1711 InstCombiner &IC, IntrinsicInst &II) = 0;
1712 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1713 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1714 KnownBits & Known, bool &KnownBitsComputed) = 0;
1715 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1716 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1717 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1718 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1719 SimplifyAndSetOp) = 0;
1720 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1721 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1722 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1723 int64_t BaseOffset, bool HasBaseReg,
1724 int64_t Scale, unsigned AddrSpace,
1725 Instruction *I) = 0;
1727 const TargetTransformInfo::LSRCost &C2) = 0;
1728 virtual bool isNumRegsMajorCostOfLSR() = 0;
1730 virtual bool canMacroFuseCmp() = 0;
1731 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1733 TargetLibraryInfo *LibInfo) = 0;
1734 virtual AddressingModeKind
1736 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1737 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1738 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1739 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1740 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1741 ElementCount NumElements) const = 0;
1742 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1743 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1745 Align Alignment) = 0;
1747 Align Alignment) = 0;
1748 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1749 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1750 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1751 unsigned Opcode1,
1752 const SmallBitVector &OpcodeMask) const = 0;
1753 virtual bool enableOrderedReductions() = 0;
1754 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1755 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1758 int64_t BaseOffset,
1759 bool HasBaseReg, int64_t Scale,
1760 unsigned AddrSpace) = 0;
1761 virtual bool LSRWithInstrQueries() = 0;
1762 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1764 virtual bool useAA() = 0;
1765 virtual bool isTypeLegal(Type *Ty) = 0;
1766 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1767 virtual bool shouldBuildLookupTables() = 0;
1769 virtual bool shouldBuildRelLookupTables() = 0;
1770 virtual bool useColdCCForColdCall(Function &F) = 0;
1772 const APInt &DemandedElts,
1773 bool Insert, bool Extract,
1775 virtual InstructionCost
1777 ArrayRef<Type *> Tys,
1780 virtual bool supportsTailCalls() = 0;
1781 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1782 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1784 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1785 virtual bool enableSelectOptimize() = 0;
1790 unsigned BitWidth,
1791 unsigned AddressSpace,
1792 Align Alignment,
1793 unsigned *Fast) = 0;
1794 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1795 virtual bool haveFastSqrt(Type *Ty) = 0;
1797 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1799 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1800 const APInt &Imm, Type *Ty) = 0;
1801 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1803 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1804 const APInt &Imm, Type *Ty,
1806 Instruction *Inst = nullptr) = 0;
1808 const APInt &Imm, Type *Ty,
1810 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1811 virtual unsigned getRegisterClassForType(bool Vector,
1812 Type *Ty = nullptr) const = 0;
1813 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1815 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1816 virtual std::optional<unsigned> getMaxVScale() const = 0;
1817 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1818 virtual bool
1820 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1821 bool IsScalable) const = 0;
1822 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1823 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1824 Type *ScalarValTy) const = 0;
1826 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1827 virtual unsigned getCacheLineSize() const = 0;
1828 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1829 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1830 const = 0;
1831
1832 /// \return How much before a load we should place the prefetch
1833 /// instruction. This is currently measured in number of
1834 /// instructions.
1835 virtual unsigned getPrefetchDistance() const = 0;
1836
1837 /// \return Some HW prefetchers can handle accesses up to a certain
1838 /// constant stride. This is the minimum stride in bytes where it
1839 /// makes sense to start adding SW prefetches. The default is 1,
1840 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1841 /// even below the HW prefetcher limit, and the arguments provided are
1842 /// meant to serve as a basis for deciding this for a particular loop.
1843 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1844 unsigned NumStridedMemAccesses,
1845 unsigned NumPrefetches,
1846 bool HasCall) const = 0;
1847
1848 /// \return The maximum number of iterations to prefetch ahead. If
1849 /// the required number of iterations is more than this number, no
1850 /// prefetching is performed.
1851 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1852
1853 /// \return True if prefetching should also be done for writes.
1854 virtual bool enableWritePrefetching() const = 0;
1855
1856 /// \return if target want to issue a prefetch in address space \p AS.
1857 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1858
1859 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
1861 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1862 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1863 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1864
1866 ArrayRef<int> Mask,
1868 int Index, VectorType *SubTp,
1869 ArrayRef<const Value *> Args) = 0;
1870 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1871 Type *Src, CastContextHint CCH,
1873 const Instruction *I) = 0;
1874 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1875 VectorType *VecTy,
1876 unsigned Index) = 0;
1877 virtual InstructionCost getCFInstrCost(unsigned Opcode,
1879 const Instruction *I = nullptr) = 0;
1880 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1881 Type *CondTy,
1882 CmpInst::Predicate VecPred,
1884 const Instruction *I) = 0;
1885 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1887 unsigned Index, Value *Op0,
1888 Value *Op1) = 0;
1891 unsigned Index) = 0;
1892
1893 virtual InstructionCost
1894 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1895 const APInt &DemandedDstElts,
1897
1898 virtual InstructionCost
1899 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1901 OperandValueInfo OpInfo, const Instruction *I) = 0;
1902 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1903 Align Alignment,
1904 unsigned AddressSpace,
1906 const Instruction *I) = 0;
1907 virtual InstructionCost
1908 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1909 unsigned AddressSpace,
1911 virtual InstructionCost
1912 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1913 bool VariableMask, Align Alignment,
1915 const Instruction *I = nullptr) = 0;
1916
1918 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1919 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1920 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1921 virtual InstructionCost
1923 std::optional<FastMathFlags> FMF,
1925 virtual InstructionCost
1926 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1929 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1930 std::optional<FastMathFlags> FMF,
1933 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1935 virtual InstructionCost
1939 ArrayRef<Type *> Tys,
1941 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1942 virtual InstructionCost
1944 virtual InstructionCost
1947 MemIntrinsicInfo &Info) = 0;
1948 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1950 Type *ExpectedType) = 0;
1952 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1953 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1954 std::optional<uint32_t> AtomicElementSize) const = 0;
1955
1957 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1958 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1959 unsigned SrcAlign, unsigned DestAlign,
1960 std::optional<uint32_t> AtomicCpySize) const = 0;
1961 virtual bool areInlineCompatible(const Function *Caller,
1962 const Function *Callee) const = 0;
1963 virtual bool areTypesABICompatible(const Function *Caller,
1964 const Function *Callee,
1965 const ArrayRef<Type *> &Types) const = 0;
1966 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1967 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1968 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1969 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1970 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1971 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1972 Align Alignment,
1973 unsigned AddrSpace) const = 0;
1974 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1975 Align Alignment,
1976 unsigned AddrSpace) const = 0;
1978 ElementCount VF) const = 0;
1979 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1980 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1981 unsigned ChainSizeInBytes,
1982 VectorType *VecTy) const = 0;
1983 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1984 unsigned ChainSizeInBytes,
1985 VectorType *VecTy) const = 0;
1986 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1987 ReductionFlags) const = 0;
1988 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1989 ReductionFlags) const = 0;
1990 virtual bool preferEpilogueVectorization() const = 0;
1991
1992 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1993 virtual unsigned getGISelRematGlobalCost() const = 0;
1994 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
1995 virtual bool enableScalableVectorization() const = 0;
1996 virtual bool supportsScalableVectors() const = 0;
1997 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1998 Align Alignment) const = 0;
1999 virtual VPLegalization
2001 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2002};
2003
2004template <typename T>
2005class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2006 T Impl;
2007
2008public:
2009 Model(T Impl) : Impl(std::move(Impl)) {}
2010 ~Model() override = default;
2011
2012 const DataLayout &getDataLayout() const override {
2013 return Impl.getDataLayout();
2014 }
2015
2016 InstructionCost
2017 getGEPCost(Type *PointeeType, const Value *Ptr,
2018 ArrayRef<const Value *> Operands,
2020 return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
2021 }
2022 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2023 const Value *Base,
2024 const PointersChainInfo &Info,
2025 TargetCostKind CostKind) override {
2026 return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
2027 }
2028 unsigned getInliningThresholdMultiplier() override {
2029 return Impl.getInliningThresholdMultiplier();
2030 }
2031 unsigned adjustInliningThreshold(const CallBase *CB) override {
2032 return Impl.adjustInliningThreshold(CB);
2033 }
2034 int getInlinerVectorBonusPercent() override {
2035 return Impl.getInlinerVectorBonusPercent();
2036 }
2037 InstructionCost getMemcpyCost(const Instruction *I) override {
2038 return Impl.getMemcpyCost(I);
2039 }
2040 InstructionCost getInstructionCost(const User *U,
2041 ArrayRef<const Value *> Operands,
2042 TargetCostKind CostKind) override {
2043 return Impl.getInstructionCost(U, Operands, CostKind);
2044 }
2045 BranchProbability getPredictableBranchThreshold() override {
2046 return Impl.getPredictableBranchThreshold();
2047 }
2048 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
2049 bool useGPUDivergenceAnalysis() override {
2050 return Impl.useGPUDivergenceAnalysis();
2051 }
2052 bool isSourceOfDivergence(const Value *V) override {
2053 return Impl.isSourceOfDivergence(V);
2054 }
2055
2056 bool isAlwaysUniform(const Value *V) override {
2057 return Impl.isAlwaysUniform(V);
2058 }
2059
2060 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2061
2062 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2063 Intrinsic::ID IID) const override {
2064 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2065 }
2066
2067 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2068 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2069 }
2070
2071 bool
2072 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2073 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2074 }
2075
2076 unsigned getAssumedAddrSpace(const Value *V) const override {
2077 return Impl.getAssumedAddrSpace(V);
2078 }
2079
2080 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2081
2082 std::pair<const Value *, unsigned>
2083 getPredicatedAddrSpace(const Value *V) const override {
2084 return Impl.getPredicatedAddrSpace(V);
2085 }
2086
2087 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2088 Value *NewV) const override {
2089 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2090 }
2091
2092 bool isLoweredToCall(const Function *F) override {
2093 return Impl.isLoweredToCall(F);
2094 }
2095 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2096 UnrollingPreferences &UP,
2097 OptimizationRemarkEmitter *ORE) override {
2098 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2099 }
2100 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2101 PeelingPreferences &PP) override {
2102 return Impl.getPeelingPreferences(L, SE, PP);
2103 }
2104 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2105 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2106 HardwareLoopInfo &HWLoopInfo) override {
2107 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2108 }
2109 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
2110 AssumptionCache &AC, TargetLibraryInfo *TLI,
2111 DominatorTree *DT,
2112 LoopVectorizationLegality *LVL,
2113 InterleavedAccessInfo *IAI) override {
2114 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
2115 }
2117 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2118 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2119 }
2120 std::optional<Instruction *>
2121 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2122 return Impl.instCombineIntrinsic(IC, II);
2123 }
2124 std::optional<Value *>
2125 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2126 APInt DemandedMask, KnownBits &Known,
2127 bool &KnownBitsComputed) override {
2128 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2129 KnownBitsComputed);
2130 }
2131 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2132 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2133 APInt &UndefElts2, APInt &UndefElts3,
2134 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2135 SimplifyAndSetOp) override {
2136 return Impl.simplifyDemandedVectorEltsIntrinsic(
2137 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2138 SimplifyAndSetOp);
2139 }
2140 bool isLegalAddImmediate(int64_t Imm) override {
2141 return Impl.isLegalAddImmediate(Imm);
2142 }
2143 bool isLegalICmpImmediate(int64_t Imm) override {
2144 return Impl.isLegalICmpImmediate(Imm);
2145 }
2146 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2147 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2148 Instruction *I) override {
2149 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2150 AddrSpace, I);
2151 }
2152 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2153 const TargetTransformInfo::LSRCost &C2) override {
2154 return Impl.isLSRCostLess(C1, C2);
2155 }
2156 bool isNumRegsMajorCostOfLSR() override {
2157 return Impl.isNumRegsMajorCostOfLSR();
2158 }
2159 bool isProfitableLSRChainElement(Instruction *I) override {
2160 return Impl.isProfitableLSRChainElement(I);
2161 }
2162 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2163 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2164 DominatorTree *DT, AssumptionCache *AC,
2165 TargetLibraryInfo *LibInfo) override {
2166 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2167 }
2169 getPreferredAddressingMode(const Loop *L,
2170 ScalarEvolution *SE) const override {
2171 return Impl.getPreferredAddressingMode(L, SE);
2172 }
2173 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2174 return Impl.isLegalMaskedStore(DataType, Alignment);
2175 }
2176 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2177 return Impl.isLegalMaskedLoad(DataType, Alignment);
2178 }
2179 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2180 return Impl.isLegalNTStore(DataType, Alignment);
2181 }
2182 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2183 return Impl.isLegalNTLoad(DataType, Alignment);
2184 }
2185 bool isLegalBroadcastLoad(Type *ElementTy,
2186 ElementCount NumElements) const override {
2187 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2188 }
2189 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2190 return Impl.isLegalMaskedScatter(DataType, Alignment);
2191 }
2192 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2193 return Impl.isLegalMaskedGather(DataType, Alignment);
2194 }
2195 bool forceScalarizeMaskedGather(VectorType *DataType,
2196 Align Alignment) override {
2197 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2198 }
2199 bool forceScalarizeMaskedScatter(VectorType *DataType,
2200 Align Alignment) override {
2201 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2202 }
2203 bool isLegalMaskedCompressStore(Type *DataType) override {
2204 return Impl.isLegalMaskedCompressStore(DataType);
2205 }
2206 bool isLegalMaskedExpandLoad(Type *DataType) override {
2207 return Impl.isLegalMaskedExpandLoad(DataType);
2208 }
2209 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2210 const SmallBitVector &OpcodeMask) const override {
2211 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2212 }
2213 bool enableOrderedReductions() override {
2214 return Impl.enableOrderedReductions();
2215 }
2216 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2217 return Impl.hasDivRemOp(DataType, IsSigned);
2218 }
2219 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2220 return Impl.hasVolatileVariant(I, AddrSpace);
2221 }
2222 bool prefersVectorizedAddressing() override {
2223 return Impl.prefersVectorizedAddressing();
2224 }
2225 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2226 int64_t BaseOffset, bool HasBaseReg,
2227 int64_t Scale,
2228 unsigned AddrSpace) override {
2229 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2230 AddrSpace);
2231 }
2232 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2233 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2234 return Impl.isTruncateFree(Ty1, Ty2);
2235 }
2236 bool isProfitableToHoist(Instruction *I) override {
2237 return Impl.isProfitableToHoist(I);
2238 }
2239 bool useAA() override { return Impl.useAA(); }
2240 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2241 unsigned getRegUsageForType(Type *Ty) override {
2242 return Impl.getRegUsageForType(Ty);
2243 }
2244 bool shouldBuildLookupTables() override {
2245 return Impl.shouldBuildLookupTables();
2246 }
2247 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2248 return Impl.shouldBuildLookupTablesForConstant(C);
2249 }
2250 bool shouldBuildRelLookupTables() override {
2251 return Impl.shouldBuildRelLookupTables();
2252 }
2253 bool useColdCCForColdCall(Function &F) override {
2254 return Impl.useColdCCForColdCall(F);
2255 }
2256
2257 InstructionCost getScalarizationOverhead(VectorType *Ty,
2258 const APInt &DemandedElts,
2259 bool Insert, bool Extract,
2260 TargetCostKind CostKind) override {
2261 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2262 CostKind);
2263 }
2264 InstructionCost
2265 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2266 ArrayRef<Type *> Tys,
2267 TargetCostKind CostKind) override {
2268 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2269 }
2270
2271 bool supportsEfficientVectorElementLoadStore() override {
2272 return Impl.supportsEfficientVectorElementLoadStore();
2273 }
2274
2275 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2276 bool supportsTailCallFor(const CallBase *CB) override {
2277 return Impl.supportsTailCallFor(CB);
2278 }
2279
2280 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2281 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2282 }
2283 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2284 bool IsZeroCmp) const override {
2285 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2286 }
2287 bool enableInterleavedAccessVectorization() override {
2288 return Impl.enableInterleavedAccessVectorization();
2289 }
2290 bool enableSelectOptimize() override {
2291 return Impl.enableSelectOptimize();
2292 }
2293 bool enableMaskedInterleavedAccessVectorization() override {
2294 return Impl.enableMaskedInterleavedAccessVectorization();
2295 }
2296 bool isFPVectorizationPotentiallyUnsafe() override {
2297 return Impl.isFPVectorizationPotentiallyUnsafe();
2298 }
2299 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2300 unsigned AddressSpace, Align Alignment,
2301 unsigned *Fast) override {
2302 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2303 Alignment, Fast);
2304 }
2305 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2306 return Impl.getPopcntSupport(IntTyWidthInBit);
2307 }
2308 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2309
2310 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2311 return Impl.isExpensiveToSpeculativelyExecute(I);
2312 }
2313
2314 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2315 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2316 }
2317
2318 InstructionCost getFPOpCost(Type *Ty) override {
2319 return Impl.getFPOpCost(Ty);
2320 }
2321
2322 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2323 const APInt &Imm, Type *Ty) override {
2324 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2325 }
2326 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2327 TargetCostKind CostKind) override {
2328 return Impl.getIntImmCost(Imm, Ty, CostKind);
2329 }
2330 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2331 const APInt &Imm, Type *Ty,
2333 Instruction *Inst = nullptr) override {
2334 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2335 }
2336 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2337 const APInt &Imm, Type *Ty,
2338 TargetCostKind CostKind) override {
2339 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2340 }
2341 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2342 return Impl.getNumberOfRegisters(ClassID);
2343 }
2344 unsigned getRegisterClassForType(bool Vector,
2345 Type *Ty = nullptr) const override {
2346 return Impl.getRegisterClassForType(Vector, Ty);
2347 }
2348 const char *getRegisterClassName(unsigned ClassID) const override {
2349 return Impl.getRegisterClassName(ClassID);
2350 }
2351 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2352 return Impl.getRegisterBitWidth(K);
2353 }
2354 unsigned getMinVectorRegisterBitWidth() const override {
2355 return Impl.getMinVectorRegisterBitWidth();
2356 }
2357 std::optional<unsigned> getMaxVScale() const override {
2358 return Impl.getMaxVScale();
2359 }
2360 std::optional<unsigned> getVScaleForTuning() const override {
2361 return Impl.getVScaleForTuning();
2362 }
2363 bool shouldMaximizeVectorBandwidth(
2364 TargetTransformInfo::RegisterKind K) const override {
2365 return Impl.shouldMaximizeVectorBandwidth(K);
2366 }
2367 ElementCount getMinimumVF(unsigned ElemWidth,
2368 bool IsScalable) const override {
2369 return Impl.getMinimumVF(ElemWidth, IsScalable);
2370 }
2371 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2372 return Impl.getMaximumVF(ElemWidth, Opcode);
2373 }
2374 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2375 Type *ScalarValTy) const override {
2376 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2377 }
2378 bool shouldConsiderAddressTypePromotion(
2379 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2380 return Impl.shouldConsiderAddressTypePromotion(
2381 I, AllowPromotionWithoutCommonHeader);
2382 }
2383 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2384 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2385 return Impl.getCacheSize(Level);
2386 }
2387 std::optional<unsigned>
2388 getCacheAssociativity(CacheLevel Level) const override {
2389 return Impl.getCacheAssociativity(Level);
2390 }
2391
2392 /// Return the preferred prefetch distance in terms of instructions.
2393 ///
2394 unsigned getPrefetchDistance() const override {
2395 return Impl.getPrefetchDistance();
2396 }
2397
2398 /// Return the minimum stride necessary to trigger software
2399 /// prefetching.
2400 ///
2401 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2402 unsigned NumStridedMemAccesses,
2403 unsigned NumPrefetches,
2404 bool HasCall) const override {
2405 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2406 NumPrefetches, HasCall);
2407 }
2408
2409 /// Return the maximum prefetch distance in terms of loop
2410 /// iterations.
2411 ///
2412 unsigned getMaxPrefetchIterationsAhead() const override {
2413 return Impl.getMaxPrefetchIterationsAhead();
2414 }
2415
2416 /// \return True if prefetching should also be done for writes.
2417 bool enableWritePrefetching() const override {
2418 return Impl.enableWritePrefetching();
2419 }
2420
2421 /// \return if target want to issue a prefetch in address space \p AS.
2422 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2423 return Impl.shouldPrefetchAddressSpace(AS);
2424 }
2425
2426 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2427 return Impl.getMaxInterleaveFactor(VF);
2428 }
2429 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2430 unsigned &JTSize,
2431 ProfileSummaryInfo *PSI,
2432 BlockFrequencyInfo *BFI) override {
2433 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2434 }
2435 InstructionCost getArithmeticInstrCost(
2436 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2437 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2438 ArrayRef<const Value *> Args,
2439 const Instruction *CxtI = nullptr) override {
2440 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2441 Args, CxtI);
2442 }
2443
2444 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2445 ArrayRef<int> Mask,
2447 VectorType *SubTp,
2448 ArrayRef<const Value *> Args) override {
2449 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2450 }
2451 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2452 CastContextHint CCH,
2454 const Instruction *I) override {
2455 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2456 }
2457 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2458 VectorType *VecTy,
2459 unsigned Index) override {
2460 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2461 }
2462 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2463 const Instruction *I = nullptr) override {
2464 return Impl.getCFInstrCost(Opcode, CostKind, I);
2465 }
2466 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2467 CmpInst::Predicate VecPred,
2469 const Instruction *I) override {
2470 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2471 }
2472 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2474 unsigned Index, Value *Op0,
2475 Value *Op1) override {
2476 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2477 }
2478 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2480 unsigned Index) override {
2481 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2482 }
2483 InstructionCost
2484 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2485 const APInt &DemandedDstElts,
2486 TTI::TargetCostKind CostKind) override {
2487 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2488 DemandedDstElts, CostKind);
2489 }
2490 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2491 unsigned AddressSpace,
2493 OperandValueInfo OpInfo,
2494 const Instruction *I) override {
2495 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2496 OpInfo, I);
2497 }
2498 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2499 unsigned AddressSpace,
2501 const Instruction *I) override {
2502 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2503 CostKind, I);
2504 }
2505 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2506 Align Alignment, unsigned AddressSpace,
2507 TTI::TargetCostKind CostKind) override {
2508 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2509 CostKind);
2510 }
2511 InstructionCost
2512 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2513 bool VariableMask, Align Alignment,
2515 const Instruction *I = nullptr) override {
2516 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2517 Alignment, CostKind, I);
2518 }
2519 InstructionCost getInterleavedMemoryOpCost(
2520 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2521 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2522 bool UseMaskForCond, bool UseMaskForGaps) override {
2523 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2524 Alignment, AddressSpace, CostKind,
2525 UseMaskForCond, UseMaskForGaps);
2526 }
2527 InstructionCost
2528 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2529 std::optional<FastMathFlags> FMF,
2530 TTI::TargetCostKind CostKind) override {
2531 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2532 }
2533 InstructionCost
2534 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2535 TTI::TargetCostKind CostKind) override {
2536 return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2537 }
2538 InstructionCost getExtendedReductionCost(
2539 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2540 std::optional<FastMathFlags> FMF,
2542 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2543 CostKind);
2544 }
2545 InstructionCost getMulAccReductionCost(
2546 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2548 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2549 }
2550 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2551 TTI::TargetCostKind CostKind) override {
2552 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2553 }
2554 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2555 ArrayRef<Type *> Tys,
2556 TTI::TargetCostKind CostKind) override {
2557 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2558 }
2559 unsigned getNumberOfParts(Type *Tp) override {
2560 return Impl.getNumberOfParts(Tp);
2561 }
2562 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2563 const SCEV *Ptr) override {
2564 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2565 }
2566 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2567 return Impl.getCostOfKeepingLiveOverCall(Tys);
2568 }
2569 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2570 MemIntrinsicInfo &Info) override {
2571 return Impl.getTgtMemIntrinsic(Inst, Info);
2572 }
2573 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2574 return Impl.getAtomicMemIntrinsicMaxElementSize();
2575 }
2576 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2577 Type *ExpectedType) override {
2578 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2579 }
2580 Type *getMemcpyLoopLoweringType(
2581 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2582 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2583 std::optional<uint32_t> AtomicElementSize) const override {
2584 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2585 DestAddrSpace, SrcAlign, DestAlign,
2586 AtomicElementSize);
2587 }
2588 void getMemcpyLoopResidualLoweringType(
2589 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2590 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2591 unsigned SrcAlign, unsigned DestAlign,
2592 std::optional<uint32_t> AtomicCpySize) const override {
2593 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2594 SrcAddrSpace, DestAddrSpace,
2595 SrcAlign, DestAlign, AtomicCpySize);
2596 }
2597 bool areInlineCompatible(const Function *Caller,
2598 const Function *Callee) const override {
2599 return Impl.areInlineCompatible(Caller, Callee);
2600 }
2601 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2602 const ArrayRef<Type *> &Types) const override {
2603 return Impl.areTypesABICompatible(Caller, Callee, Types);
2604 }
2605 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2606 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2607 }
2608 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2609 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2610 }
2611 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2612 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2613 }
2614 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2615 return Impl.isLegalToVectorizeLoad(LI);
2616 }
2617 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2618 return Impl.isLegalToVectorizeStore(SI);
2619 }
2620 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2621 unsigned AddrSpace) const override {
2622 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2623 AddrSpace);
2624 }
2625 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2626 unsigned AddrSpace) const override {
2627 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2628 AddrSpace);
2629 }
2630 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2631 ElementCount VF) const override {
2632 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2633 }
2634 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2635 return Impl.isElementTypeLegalForScalableVector(Ty);
2636 }
2637 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2638 unsigned ChainSizeInBytes,
2639 VectorType *VecTy) const override {
2640 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2641 }
2642 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2643 unsigned ChainSizeInBytes,
2644 VectorType *VecTy) const override {
2645 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2646 }
2647 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2648 ReductionFlags Flags) const override {
2649 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2650 }
2651 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2652 ReductionFlags Flags) const override {
2653 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2654 }
2655 bool preferEpilogueVectorization() const override {
2656 return Impl.preferEpilogueVectorization();
2657 }
2658
2659 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2660 return Impl.shouldExpandReduction(II);
2661 }
2662
2663 unsigned getGISelRematGlobalCost() const override {
2664 return Impl.getGISelRematGlobalCost();
2665 }
2666
2667 unsigned getMinTripCountTailFoldingThreshold() const override {
2668 return Impl.getMinTripCountTailFoldingThreshold();
2669 }
2670
2671 bool supportsScalableVectors() const override {
2672 return Impl.supportsScalableVectors();
2673 }
2674
2675 bool enableScalableVectorization() const override {
2676 return Impl.enableScalableVectorization();
2677 }
2678
2679 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2680 Align Alignment) const override {
2681 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2682 }
2683
2685 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2686 return Impl.getVPLegalizationStrategy(PI);
2687 }
2688
2689 bool hasArmWideBranch(bool Thumb) const override {
2690 return Impl.hasArmWideBranch(Thumb);
2691 }
2692};
2693
2694template <typename T>
2696 : TTIImpl(new Model<T>(Impl)) {}
2697
2698/// Analysis pass providing the \c TargetTransformInfo.
2699///
2700/// The core idea of the TargetIRAnalysis is to expose an interface through
2701/// which LLVM targets can analyze and provide information about the middle
2702/// end's target-independent IR. This supports use cases such as target-aware
2703/// cost modeling of IR constructs.
2704///
2705/// This is a function analysis because much of the cost modeling for targets
2706/// is done in a subtarget specific way and LLVM supports compiling different
2707/// functions targeting different subtargets in order to support runtime
2708/// dispatch according to the observed subtarget.
2709class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2710public:
2712
2713 /// Default construct a target IR analysis.
2714 ///
2715 /// This will use the module's datalayout to construct a baseline
2716 /// conservative TTI result.
2718
2719 /// Construct an IR analysis pass around a target-provide callback.
2720 ///
2721 /// The callback will be called with a particular function for which the TTI
2722 /// is needed and must return a TTI object for that function.
2723 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2724
2725 // Value semantics. We spell out the constructors for MSVC.
2727 : TTICallback(Arg.TTICallback) {}
2729 : TTICallback(std::move(Arg.TTICallback)) {}
2731 TTICallback = RHS.TTICallback;
2732 return *this;
2733 }
2735 TTICallback = std::move(RHS.TTICallback);
2736 return *this;
2737 }
2738
2740
2741private:
2743 static AnalysisKey Key;
2744
2745 /// The callback used to produce a result.
2746 ///
2747 /// We use a completely opaque callback so that targets can provide whatever
2748 /// mechanism they desire for constructing the TTI for a given function.
2749 ///
2750 /// FIXME: Should we really use std::function? It's relatively inefficient.
2751 /// It might be possible to arrange for even stateful callbacks to outlive
2752 /// the analysis and thus use a function_ref which would be lighter weight.
2753 /// This may also be less error prone as the callback is likely to reference
2754 /// the external TargetMachine, and that reference needs to never dangle.
2755 std::function<Result(const Function &)> TTICallback;
2756
2757 /// Helper function used as the callback in the default constructor.
2758 static Result getDefaultTTI(const Function &F);
2759};
2760
2761/// Wrapper pass for TargetTransformInfo.
2762///
2763/// This pass can be constructed from a TTI object which it stores internally
2764/// and is queried by passes.
2766 TargetIRAnalysis TIRA;
2767 std::optional<TargetTransformInfo> TTI;
2768
2769 virtual void anchor();
2770
2771public:
2772 static char ID;
2773
2774 /// We must provide a default constructor for the pass but it should
2775 /// never be used.
2776 ///
2777 /// Use the constructor below or call one of the creation routines.
2779
2781
2783};
2784
2785/// Create an analysis pass wrapper around a TTI object.
2786///
2787/// This analysis pass just holds the TTI instance and makes it available to
2788/// clients.
2790
2791} // namespace llvm
2792
2793#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
@ Flags
Definition: TextStubV5.cpp:93
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:75
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:279
The core instruction combiner logic.
Definition: InstCombiner.h:45
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:780
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool useGPUDivergenceAnalysis()=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual int getInlinerVectorBonusPercent()=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getInliningThresholdMultiplier()=0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool hasBranchDivergence() const
Return true if branch divergence exists.
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool hasArmWideBranch(bool Thumb) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:406
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
constexpr std::nullopt_t None
Definition: None.h:28
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
static PointersChainInfo getKnownNonUniformStrided()
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
unsigned IsUniformStride
These properties only valid if SameBaseAddress is set.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)