LLVM 20.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/APInt.h"
26#include "llvm/IR/FMF.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
33#include <functional>
34#include <optional>
35#include <utility>
36
37namespace llvm {
38
39namespace Intrinsic {
40typedef unsigned ID;
41}
42
43class AllocaInst;
44class AssumptionCache;
45class BlockFrequencyInfo;
46class DominatorTree;
47class BranchInst;
48class CallBase;
49class Function;
50class GlobalValue;
51class InstCombiner;
52class OptimizationRemarkEmitter;
53class InterleavedAccessInfo;
54class IntrinsicInst;
55class LoadInst;
56class Loop;
57class LoopInfo;
58class LoopVectorizationLegality;
59class ProfileSummaryInfo;
60class RecurrenceDescriptor;
61class SCEV;
62class ScalarEvolution;
63class StoreInst;
64class SwitchInst;
65class TargetLibraryInfo;
66class Type;
67class User;
68class Value;
69class VPIntrinsic;
70struct KnownBits;
71
72/// Information about a load/store intrinsic defined by the target.
74 /// This is the pointer that the intrinsic is loading from or storing to.
75 /// If this is non-null, then analysis/optimization passes can assume that
76 /// this intrinsic is functionally equivalent to a load/store from this
77 /// pointer.
78 Value *PtrVal = nullptr;
79
80 // Ordering for atomic operations.
82
83 // Same Id is set by the target for corresponding load/store intrinsics.
84 unsigned short MatchingId = 0;
85
86 bool ReadMem = false;
87 bool WriteMem = false;
88 bool IsVolatile = false;
89
90 bool isUnordered() const {
94 }
95};
96
97/// Attributes of a target dependent hardware loop.
99 HardwareLoopInfo() = delete;
101 Loop *L = nullptr;
104 const SCEV *ExitCount = nullptr;
106 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
107 // value in every iteration.
108 bool IsNestingLegal = false; // Can a hardware loop be a parent to
109 // another hardware loop?
110 bool CounterInReg = false; // Should loop counter be updated in
111 // the loop via a phi?
112 bool PerformEntryTest = false; // Generate the intrinsic which also performs
113 // icmp ne zero on the loop counter value and
114 // produces an i1 to guard the loop entry.
116 DominatorTree &DT, bool ForceNestedLoop = false,
117 bool ForceHardwareLoopPHI = false);
118 bool canAnalyze(LoopInfo &LI);
119};
120
122 const IntrinsicInst *II = nullptr;
123 Type *RetTy = nullptr;
124 Intrinsic::ID IID;
125 SmallVector<Type *, 4> ParamTys;
127 FastMathFlags FMF;
128 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
129 // arguments and the return value will be computed based on types.
130 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
131
132public:
134 Intrinsic::ID Id, const CallBase &CI,
136 bool TypeBasedOnly = false);
137
139 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
140 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
142
145
149 const IntrinsicInst *I = nullptr,
151
152 Intrinsic::ID getID() const { return IID; }
153 const IntrinsicInst *getInst() const { return II; }
154 Type *getReturnType() const { return RetTy; }
155 FastMathFlags getFlags() const { return FMF; }
156 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
158 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
159
160 bool isTypeBasedOnly() const {
161 return Arguments.empty();
162 }
163
164 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
165};
166
168 /// Don't use tail folding
169 None,
170 /// Use predicate only to mask operations on data in the loop.
171 /// When the VL is not known to be a power-of-2, this method requires a
172 /// runtime overflow check for the i + VL in the loop because it compares the
173 /// scalar induction variable against the tripcount rounded up by VL which may
174 /// overflow. When the VL is a power-of-2, both the increment and uprounded
175 /// tripcount will overflow to 0, which does not require a runtime check
176 /// since the loop is exited when the loop induction variable equals the
177 /// uprounded trip-count, which are both 0.
178 Data,
179 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
180 /// calculate the mask and instead implements this with a
181 /// splat/stepvector/cmp.
182 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
183 /// active.lane.mask intrinsic when it is not natively supported?
185 /// Use predicate to control both data and control flow.
186 /// This method always requires a runtime overflow check for the i + VL
187 /// increment inside the loop, because it uses the result direclty in the
188 /// active.lane.mask to calculate the mask for the next iteration. If the
189 /// increment overflows, the mask is no longer correct.
191 /// Use predicate to control both data and control flow, but modify
192 /// the trip count so that a runtime overflow check can be avoided
193 /// and such that the scalar epilogue loop can always be removed.
195 /// Use predicated EVL instructions for tail-folding.
196 /// Indicates that VP intrinsics should be used.
198};
199
206 : TLI(TLI), LVL(LVL), IAI(IAI) {}
207};
208
209class TargetTransformInfo;
211
212/// This pass provides access to the codegen interfaces that are needed
213/// for IR-level transformations.
215public:
216 /// Construct a TTI object using a type implementing the \c Concept
217 /// API below.
218 ///
219 /// This is used by targets to construct a TTI wrapping their target-specific
220 /// implementation that encodes appropriate costs for their target.
221 template <typename T> TargetTransformInfo(T Impl);
222
223 /// Construct a baseline TTI object using a minimal implementation of
224 /// the \c Concept API below.
225 ///
226 /// The TTI implementation will reflect the information in the DataLayout
227 /// provided if non-null.
228 explicit TargetTransformInfo(const DataLayout &DL);
229
230 // Provide move semantics.
233
234 // We need to define the destructor out-of-line to define our sub-classes
235 // out-of-line.
237
238 /// Handle the invalidation of this information.
239 ///
240 /// When used as a result of \c TargetIRAnalysis this method will be called
241 /// when the function this was computed for changes. When it returns false,
242 /// the information is preserved across those changes.
245 // FIXME: We should probably in some way ensure that the subtarget
246 // information for a function hasn't changed.
247 return false;
248 }
249
250 /// \name Generic Target Information
251 /// @{
252
253 /// The kind of cost model.
254 ///
255 /// There are several different cost models that can be customized by the
256 /// target. The normalization of each cost model may be target specific.
257 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
258 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
260 TCK_RecipThroughput, ///< Reciprocal throughput.
261 TCK_Latency, ///< The latency of instruction.
262 TCK_CodeSize, ///< Instruction code size.
263 TCK_SizeAndLatency ///< The weighted sum of size and latency.
264 };
265
266 /// Underlying constants for 'cost' values in this interface.
267 ///
268 /// Many APIs in this interface return a cost. This enum defines the
269 /// fundamental values that should be used to interpret (and produce) those
270 /// costs. The costs are returned as an int rather than a member of this
271 /// enumeration because it is expected that the cost of one IR instruction
272 /// may have a multiplicative factor to it or otherwise won't fit directly
273 /// into the enum. Moreover, it is common to sum or average costs which works
274 /// better as simple integral values. Thus this enum only provides constants.
275 /// Also note that the returned costs are signed integers to make it natural
276 /// to add, subtract, and test with zero (a common boundary condition). It is
277 /// not expected that 2^32 is a realistic cost to be modeling at any point.
278 ///
279 /// Note that these costs should usually reflect the intersection of code-size
280 /// cost and execution cost. A free instruction is typically one that folds
281 /// into another instruction. For example, reg-to-reg moves can often be
282 /// skipped by renaming the registers in the CPU, but they still are encoded
283 /// and thus wouldn't be considered 'free' here.
285 TCC_Free = 0, ///< Expected to fold away in lowering.
286 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
287 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
288 };
289
290 /// Estimate the cost of a GEP operation when lowered.
291 ///
292 /// \p PointeeType is the source element type of the GEP.
293 /// \p Ptr is the base pointer operand.
294 /// \p Operands is the list of indices following the base pointer.
295 ///
296 /// \p AccessType is a hint as to what type of memory might be accessed by
297 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
298 /// folded into the addressing mode of a load/store. If AccessType is null,
299 /// then the resulting target type based off of PointeeType will be used as an
300 /// approximation.
302 getGEPCost(Type *PointeeType, const Value *Ptr,
303 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
305
306 /// Describe known properties for a set of pointers.
308 /// All the GEPs in a set have same base address.
309 unsigned IsSameBaseAddress : 1;
310 /// These properties only valid if SameBaseAddress is set.
311 /// True if all pointers are separated by a unit stride.
312 unsigned IsUnitStride : 1;
313 /// True if distance between any two neigbouring pointers is a known value.
314 unsigned IsKnownStride : 1;
315 unsigned Reserved : 29;
316
317 bool isSameBase() const { return IsSameBaseAddress; }
318 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
320
322 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
323 /*IsKnownStride=*/1, 0};
324 }
326 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
327 /*IsKnownStride=*/1, 0};
328 }
330 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
331 /*IsKnownStride=*/0, 0};
332 }
333 };
334 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
335
336 /// Estimate the cost of a chain of pointers (typically pointer operands of a
337 /// chain of loads or stores within same block) operations set when lowered.
338 /// \p AccessTy is the type of the loads/stores that will ultimately use the
339 /// \p Ptrs.
342 const PointersChainInfo &Info, Type *AccessTy,
344
345 ) const;
346
347 /// \returns A value by which our inlining threshold should be multiplied.
348 /// This is primarily used to bump up the inlining threshold wholesale on
349 /// targets where calls are unusually expensive.
350 ///
351 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
352 /// individual classes of instructions would be better.
353 unsigned getInliningThresholdMultiplier() const;
354
357
358 /// \returns A value to be added to the inlining threshold.
359 unsigned adjustInliningThreshold(const CallBase *CB) const;
360
361 /// \returns The cost of having an Alloca in the caller if not inlined, to be
362 /// added to the threshold
363 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
364
365 /// \returns Vector bonus in percent.
366 ///
367 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
368 /// and apply this bonus based on the percentage of vector instructions. A
369 /// bonus is applied if the vector instructions exceed 50% and half that
370 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
371 /// arbitrary and evolved over time by accident as much as because they are
372 /// principled bonuses.
373 /// FIXME: It would be nice to base the bonus values on something more
374 /// scientific. A target may has no bonus on vector instructions.
376
377 /// \return the expected cost of a memcpy, which could e.g. depend on the
378 /// source/destination type and alignment and the number of bytes copied.
380
381 /// Returns the maximum memset / memcpy size in bytes that still makes it
382 /// profitable to inline the call.
384
385 /// \return The estimated number of case clusters when lowering \p 'SI'.
386 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
387 /// table.
389 unsigned &JTSize,
391 BlockFrequencyInfo *BFI) const;
392
393 /// Estimate the cost of a given IR user when lowered.
394 ///
395 /// This can estimate the cost of either a ConstantExpr or Instruction when
396 /// lowered.
397 ///
398 /// \p Operands is a list of operands which can be a result of transformations
399 /// of the current operands. The number of the operands on the list must equal
400 /// to the number of the current operands the IR user has. Their order on the
401 /// list must be the same as the order of the current operands the IR user
402 /// has.
403 ///
404 /// The returned cost is defined in terms of \c TargetCostConstants, see its
405 /// comments for a detailed explanation of the cost values.
409
410 /// This is a helper function which calls the three-argument
411 /// getInstructionCost with \p Operands which are the current operands U has.
413 TargetCostKind CostKind) const {
414 SmallVector<const Value *, 4> Operands(U->operand_values());
416 }
417
418 /// If a branch or a select condition is skewed in one direction by more than
419 /// this factor, it is very likely to be predicted correctly.
421
422 /// Returns estimated penalty of a branch misprediction in latency. Indicates
423 /// how aggressive the target wants for eliminating unpredictable branches. A
424 /// zero return value means extra optimization applied to them should be
425 /// minimal.
427
428 /// Return true if branch divergence exists.
429 ///
430 /// Branch divergence has a significantly negative impact on GPU performance
431 /// when threads in the same wavefront take different paths due to conditional
432 /// branches.
433 ///
434 /// If \p F is passed, provides a context function. If \p F is known to only
435 /// execute in a single threaded environment, the target may choose to skip
436 /// uniformity analysis and assume all values are uniform.
437 bool hasBranchDivergence(const Function *F = nullptr) const;
438
439 /// Returns whether V is a source of divergence.
440 ///
441 /// This function provides the target-dependent information for
442 /// the target-independent UniformityAnalysis.
443 bool isSourceOfDivergence(const Value *V) const;
444
445 // Returns true for the target specific
446 // set of operations which produce uniform result
447 // even taking non-uniform arguments
448 bool isAlwaysUniform(const Value *V) const;
449
450 /// Query the target whether the specified address space cast from FromAS to
451 /// ToAS is valid.
452 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
453
454 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
455 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
456
457 /// Returns the address space ID for a target's 'flat' address space. Note
458 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
459 /// refers to as the generic address space. The flat address space is a
460 /// generic address space that can be used access multiple segments of memory
461 /// with different address spaces. Access of a memory location through a
462 /// pointer with this address space is expected to be legal but slower
463 /// compared to the same memory location accessed through a pointer with a
464 /// different address space.
465 //
466 /// This is for targets with different pointer representations which can
467 /// be converted with the addrspacecast instruction. If a pointer is converted
468 /// to this address space, optimizations should attempt to replace the access
469 /// with the source address space.
470 ///
471 /// \returns ~0u if the target does not have such a flat address space to
472 /// optimize away.
473 unsigned getFlatAddressSpace() const;
474
475 /// Return any intrinsic address operand indexes which may be rewritten if
476 /// they use a flat address space pointer.
477 ///
478 /// \returns true if the intrinsic was handled.
480 Intrinsic::ID IID) const;
481
482 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
483
484 /// Return true if globals in this address space can have initializers other
485 /// than `undef`.
487
488 unsigned getAssumedAddrSpace(const Value *V) const;
489
490 bool isSingleThreaded() const;
491
492 std::pair<const Value *, unsigned>
493 getPredicatedAddrSpace(const Value *V) const;
494
495 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
496 /// NewV, which has a different address space. This should happen for every
497 /// operand index that collectFlatAddressOperands returned for the intrinsic.
498 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
499 /// new value (which may be the original \p II with modified operands).
501 Value *NewV) const;
502
503 /// Test whether calls to a function lower to actual program function
504 /// calls.
505 ///
506 /// The idea is to test whether the program is likely to require a 'call'
507 /// instruction or equivalent in order to call the given function.
508 ///
509 /// FIXME: It's not clear that this is a good or useful query API. Client's
510 /// should probably move to simpler cost metrics using the above.
511 /// Alternatively, we could split the cost interface into distinct code-size
512 /// and execution-speed costs. This would allow modelling the core of this
513 /// query more accurately as a call is a single small instruction, but
514 /// incurs significant execution cost.
515 bool isLoweredToCall(const Function *F) const;
516
517 struct LSRCost {
518 /// TODO: Some of these could be merged. Also, a lexical ordering
519 /// isn't always optimal.
520 unsigned Insns;
521 unsigned NumRegs;
522 unsigned AddRecCost;
523 unsigned NumIVMuls;
524 unsigned NumBaseAdds;
525 unsigned ImmCost;
526 unsigned SetupCost;
527 unsigned ScaleCost;
528 };
529
530 /// Parameters that control the generic loop unrolling transformation.
532 /// The cost threshold for the unrolled loop. Should be relative to the
533 /// getInstructionCost values returned by this API, and the expectation is
534 /// that the unrolled loop's instructions when run through that interface
535 /// should not exceed this cost. However, this is only an estimate. Also,
536 /// specific loops may be unrolled even with a cost above this threshold if
537 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
538 /// restriction.
539 unsigned Threshold;
540 /// If complete unrolling will reduce the cost of the loop, we will boost
541 /// the Threshold by a certain percent to allow more aggressive complete
542 /// unrolling. This value provides the maximum boost percentage that we
543 /// can apply to Threshold (The value should be no less than 100).
544 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
545 /// MaxPercentThresholdBoost / 100)
546 /// E.g. if complete unrolling reduces the loop execution time by 50%
547 /// then we boost the threshold by the factor of 2x. If unrolling is not
548 /// expected to reduce the running time, then we do not increase the
549 /// threshold.
551 /// The cost threshold for the unrolled loop when optimizing for size (set
552 /// to UINT_MAX to disable).
554 /// The cost threshold for the unrolled loop, like Threshold, but used
555 /// for partial/runtime unrolling (set to UINT_MAX to disable).
557 /// The cost threshold for the unrolled loop when optimizing for size, like
558 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
559 /// UINT_MAX to disable).
561 /// A forced unrolling factor (the number of concatenated bodies of the
562 /// original loop in the unrolled loop body). When set to 0, the unrolling
563 /// transformation will select an unrolling factor based on the current cost
564 /// threshold and other factors.
565 unsigned Count;
566 /// Default unroll count for loops with run-time trip count.
568 // Set the maximum unrolling factor. The unrolling factor may be selected
569 // using the appropriate cost threshold, but may not exceed this number
570 // (set to UINT_MAX to disable). This does not apply in cases where the
571 // loop is being fully unrolled.
572 unsigned MaxCount;
573 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
574 /// to be overrided by a target gives more flexiblity on certain cases.
575 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
577 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
578 /// applies even if full unrolling is selected. This allows a target to fall
579 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
581 // Represents number of instructions optimized when "back edge"
582 // becomes "fall through" in unrolled loop.
583 // For now we count a conditional branch on a backedge and a comparison
584 // feeding it.
585 unsigned BEInsns;
586 /// Allow partial unrolling (unrolling of loops to expand the size of the
587 /// loop body, not only to eliminate small constant-trip-count loops).
589 /// Allow runtime unrolling (unrolling of loops to expand the size of the
590 /// loop body even when the number of loop iterations is not known at
591 /// compile time).
593 /// Allow generation of a loop remainder (extra iterations after unroll).
595 /// Allow emitting expensive instructions (such as divisions) when computing
596 /// the trip count of a loop for runtime unrolling.
598 /// Apply loop unroll on any kind of loop
599 /// (mainly to loops that fail runtime unrolling).
600 bool Force;
601 /// Allow using trip count upper bound to unroll loops.
603 /// Allow unrolling of all the iterations of the runtime loop remainder.
605 /// Allow unroll and jam. Used to enable unroll and jam for the target.
607 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
608 /// value above is used during unroll and jam for the outer loop size.
609 /// This value is used in the same manner to limit the size of the inner
610 /// loop.
612 /// Don't allow loop unrolling to simulate more than this number of
613 /// iterations when checking full unroll profitability
615 /// Don't disable runtime unroll for the loops which were vectorized.
617 };
618
619 /// Get target-customized preferences for the generic loop unrolling
620 /// transformation. The caller will initialize UP with the current
621 /// target-independent defaults.
624 OptimizationRemarkEmitter *ORE) const;
625
626 /// Query the target whether it would be profitable to convert the given loop
627 /// into a hardware loop.
630 HardwareLoopInfo &HWLoopInfo) const;
631
632 /// Query the target whether it would be prefered to create a predicated
633 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
635
636 /// Query the target what the preferred style of tail folding is.
637 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
638 /// may (or will never) overflow for the suggested VF/UF in the given loop.
639 /// Targets can use this information to select a more optimal tail folding
640 /// style. The value conservatively defaults to true, such that no assumptions
641 /// are made on overflow.
643 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
644
645 // Parameters that control the loop peeling transformation
647 /// A forced peeling factor (the number of bodied of the original loop
648 /// that should be peeled off before the loop body). When set to 0, the
649 /// a peeling factor based on profile information and other factors.
650 unsigned PeelCount;
651 /// Allow peeling off loop iterations.
653 /// Allow peeling off loop iterations for loop nests.
655 /// Allow peeling basing on profile. Uses to enable peeling off all
656 /// iterations basing on provided profile.
657 /// If the value is true the peeling cost model can decide to peel only
658 /// some iterations and in this case it will set this to false.
660 };
661
662 /// Get target-customized preferences for the generic loop peeling
663 /// transformation. The caller will initialize \p PP with the current
664 /// target-independent defaults with information from \p L and \p SE.
666 PeelingPreferences &PP) const;
667
668 /// Targets can implement their own combinations for target-specific
669 /// intrinsics. This function will be called from the InstCombine pass every
670 /// time a target-specific intrinsic is encountered.
671 ///
672 /// \returns std::nullopt to not do anything target specific or a value that
673 /// will be returned from the InstCombiner. It is possible to return null and
674 /// stop further processing of the intrinsic by returning nullptr.
675 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
676 IntrinsicInst & II) const;
677 /// Can be used to implement target-specific instruction combining.
678 /// \see instCombineIntrinsic
679 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
680 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
681 KnownBits & Known, bool &KnownBitsComputed) const;
682 /// Can be used to implement target-specific instruction combining.
683 /// \see instCombineIntrinsic
684 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
685 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
686 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
687 std::function<void(Instruction *, unsigned, APInt, APInt &)>
688 SimplifyAndSetOp) const;
689 /// @}
690
691 /// \name Scalar Target Information
692 /// @{
693
694 /// Flags indicating the kind of support for population count.
695 ///
696 /// Compared to the SW implementation, HW support is supposed to
697 /// significantly boost the performance when the population is dense, and it
698 /// may or may not degrade performance if the population is sparse. A HW
699 /// support is considered as "Fast" if it can outperform, or is on a par
700 /// with, SW implementation when the population is sparse; otherwise, it is
701 /// considered as "Slow".
703
704 /// Return true if the specified immediate is legal add immediate, that
705 /// is the target has add instructions which can add a register with the
706 /// immediate without having to materialize the immediate into a register.
707 bool isLegalAddImmediate(int64_t Imm) const;
708
709 /// Return true if adding the specified scalable immediate is legal, that is
710 /// the target has add instructions which can add a register with the
711 /// immediate (multiplied by vscale) without having to materialize the
712 /// immediate into a register.
713 bool isLegalAddScalableImmediate(int64_t Imm) const;
714
715 /// Return true if the specified immediate is legal icmp immediate,
716 /// that is the target has icmp instructions which can compare a register
717 /// against the immediate without having to materialize the immediate into a
718 /// register.
719 bool isLegalICmpImmediate(int64_t Imm) const;
720
721 /// Return true if the addressing mode represented by AM is legal for
722 /// this target, for a load/store of the specified type.
723 /// The type may be VoidTy, in which case only return true if the addressing
724 /// mode is legal for a load/store of any legal type.
725 /// If target returns true in LSRWithInstrQueries(), I may be valid.
726 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
727 /// an invariant value known only at runtime. Most targets should not accept
728 /// a scalable offset.
729 ///
730 /// TODO: Handle pre/postinc as well.
731 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
732 bool HasBaseReg, int64_t Scale,
733 unsigned AddrSpace = 0, Instruction *I = nullptr,
734 int64_t ScalableOffset = 0) const;
735
736 /// Return true if LSR cost of C1 is lower than C2.
738 const TargetTransformInfo::LSRCost &C2) const;
739
740 /// Return true if LSR major cost is number of registers. Targets which
741 /// implement their own isLSRCostLess and unset number of registers as major
742 /// cost should return false, otherwise return true.
743 bool isNumRegsMajorCostOfLSR() const;
744
745 /// Return true if LSR should attempts to replace a use of an otherwise dead
746 /// primary IV in the latch condition with another IV available in the loop.
747 /// When successful, makes the primary IV dead.
749
750 /// Return true if LSR should drop a found solution if it's calculated to be
751 /// less profitable than the baseline.
753
754 /// \returns true if LSR should not optimize a chain that includes \p I.
756
757 /// Return true if the target can fuse a compare and branch.
758 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
759 /// calculation for the instructions in a loop.
760 bool canMacroFuseCmp() const;
761
762 /// Return true if the target can save a compare for loop count, for example
763 /// hardware loop saves a compare.
764 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
766 TargetLibraryInfo *LibInfo) const;
767
772 };
773
774 /// Return the preferred addressing mode LSR should make efforts to generate.
776 ScalarEvolution *SE) const;
777
778 /// Return true if the target supports masked store.
779 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
780 /// Return true if the target supports masked load.
781 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
782
783 /// Return true if the target supports nontemporal store.
784 bool isLegalNTStore(Type *DataType, Align Alignment) const;
785 /// Return true if the target supports nontemporal load.
786 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
787
788 /// \Returns true if the target supports broadcasting a load to a vector of
789 /// type <NumElements x ElementTy>.
790 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
791
792 /// Return true if the target supports masked scatter.
793 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
794 /// Return true if the target supports masked gather.
795 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
796 /// Return true if the target forces scalarizing of llvm.masked.gather
797 /// intrinsics.
798 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
799 /// Return true if the target forces scalarizing of llvm.masked.scatter
800 /// intrinsics.
801 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
802
803 /// Return true if the target supports masked compress store.
804 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
805 /// Return true if the target supports masked expand load.
806 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
807
808 /// Return true if the target supports strided load.
809 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
810
811 // Return true if the target supports masked vector histograms.
812 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const;
813
814 /// Return true if this is an alternating opcode pattern that can be lowered
815 /// to a single instruction on the target. In X86 this is for the addsub
816 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
817 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
818 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
819 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
820 /// \p VecTy is the vector type of the instruction to be generated.
821 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
822 const SmallBitVector &OpcodeMask) const;
823
824 /// Return true if we should be enabling ordered reductions for the target.
825 bool enableOrderedReductions() const;
826
827 /// Return true if the target has a unified operation to calculate division
828 /// and remainder. If so, the additional implicit multiplication and
829 /// subtraction required to calculate a remainder from division are free. This
830 /// can enable more aggressive transformations for division and remainder than
831 /// would typically be allowed using throughput or size cost models.
832 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
833
834 /// Return true if the given instruction (assumed to be a memory access
835 /// instruction) has a volatile variant. If that's the case then we can avoid
836 /// addrspacecast to generic AS for volatile loads/stores. Default
837 /// implementation returns false, which prevents address space inference for
838 /// volatile loads/stores.
839 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
840
841 /// Return true if target doesn't mind addresses in vectors.
842 bool prefersVectorizedAddressing() const;
843
844 /// Return the cost of the scaling factor used in the addressing
845 /// mode represented by AM for this target, for a load/store
846 /// of the specified type.
847 /// If the AM is supported, the return value must be >= 0.
848 /// If the AM is not supported, it returns a negative value.
849 /// TODO: Handle pre/postinc as well.
851 StackOffset BaseOffset, bool HasBaseReg,
852 int64_t Scale,
853 unsigned AddrSpace = 0) const;
854
855 /// Return true if the loop strength reduce pass should make
856 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
857 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
858 /// immediate offset and no index register.
859 bool LSRWithInstrQueries() const;
860
861 /// Return true if it's free to truncate a value of type Ty1 to type
862 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
863 /// by referencing its sub-register AX.
864 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
865
866 /// Return true if it is profitable to hoist instruction in the
867 /// then/else to before if.
868 bool isProfitableToHoist(Instruction *I) const;
869
870 bool useAA() const;
871
872 /// Return true if this type is legal.
873 bool isTypeLegal(Type *Ty) const;
874
875 /// Returns the estimated number of registers required to represent \p Ty.
876 unsigned getRegUsageForType(Type *Ty) const;
877
878 /// Return true if switches should be turned into lookup tables for the
879 /// target.
880 bool shouldBuildLookupTables() const;
881
882 /// Return true if switches should be turned into lookup tables
883 /// containing this constant value for the target.
885
886 /// Return true if lookup tables should be turned into relative lookup tables.
887 bool shouldBuildRelLookupTables() const;
888
889 /// Return true if the input function which is cold at all call sites,
890 /// should use coldcc calling convention.
891 bool useColdCCForColdCall(Function &F) const;
892
893 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
894 /// are set if the demanded result elements need to be inserted and/or
895 /// extracted from vectors.
897 const APInt &DemandedElts,
898 bool Insert, bool Extract,
900
901 /// Estimate the overhead of scalarizing an instructions unique
902 /// non-constant operands. The (potentially vector) types to use for each of
903 /// argument are passes via Tys.
908
909 /// If target has efficient vector element load/store instructions, it can
910 /// return true here so that insertion/extraction costs are not added to
911 /// the scalarization cost of a load/store.
913
914 /// If the target supports tail calls.
915 bool supportsTailCalls() const;
916
917 /// If target supports tail call on \p CB
918 bool supportsTailCallFor(const CallBase *CB) const;
919
920 /// Don't restrict interleaved unrolling to small loops.
921 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
922
923 /// Returns options for expansion of memcmp. IsZeroCmp is
924 // true if this is the expansion of memcmp(p1, p2, s) == 0.
926 // Return true if memcmp expansion is enabled.
927 operator bool() const { return MaxNumLoads > 0; }
928
929 // Maximum number of load operations.
930 unsigned MaxNumLoads = 0;
931
932 // The list of available load sizes (in bytes), sorted in decreasing order.
934
935 // For memcmp expansion when the memcmp result is only compared equal or
936 // not-equal to 0, allow up to this number of load pairs per block. As an
937 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
938 // a0 = load2bytes &a[0]
939 // b0 = load2bytes &b[0]
940 // a2 = load1byte &a[2]
941 // b2 = load1byte &b[2]
942 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
943 unsigned NumLoadsPerBlock = 1;
944
945 // Set to true to allow overlapping loads. For example, 7-byte compares can
946 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
947 // requires all loads in LoadSizes to be doable in an unaligned way.
949
950 // Sometimes, the amount of data that needs to be compared is smaller than
951 // the standard register size, but it cannot be loaded with just one load
952 // instruction. For example, if the size of the memory comparison is 6
953 // bytes, we can handle it more efficiently by loading all 6 bytes in a
954 // single block and generating an 8-byte number, instead of generating two
955 // separate blocks with conditional jumps for 4 and 2 byte loads. This
956 // approach simplifies the process and produces the comparison result as
957 // normal. This array lists the allowed sizes of memcmp tails that can be
958 // merged into one block
960 };
962 bool IsZeroCmp) const;
963
964 /// Should the Select Optimization pass be enabled and ran.
965 bool enableSelectOptimize() const;
966
967 /// Should the Select Optimization pass treat the given instruction like a
968 /// select, potentially converting it to a conditional branch. This can
969 /// include select-like instructions like or(zext(c), x) that can be converted
970 /// to selects.
972
973 /// Enable matching of interleaved access groups.
975
976 /// Enable matching of interleaved access groups that contain predicated
977 /// accesses or gaps and therefore vectorized using masked
978 /// vector loads/stores.
980
981 /// Indicate that it is potentially unsafe to automatically vectorize
982 /// floating-point operations because the semantics of vector and scalar
983 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
984 /// does not support IEEE-754 denormal numbers, while depending on the
985 /// platform, scalar floating-point math does.
986 /// This applies to floating-point math operations and calls, not memory
987 /// operations, shuffles, or casts.
989
990 /// Determine if the target supports unaligned memory accesses.
992 unsigned AddressSpace = 0,
993 Align Alignment = Align(1),
994 unsigned *Fast = nullptr) const;
995
996 /// Return hardware support for population count.
997 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
998
999 /// Return true if the hardware has a fast square-root instruction.
1000 bool haveFastSqrt(Type *Ty) const;
1001
1002 /// Return true if the cost of the instruction is too high to speculatively
1003 /// execute and should be kept behind a branch.
1004 /// This normally just wraps around a getInstructionCost() call, but some
1005 /// targets might report a low TCK_SizeAndLatency value that is incompatible
1006 /// with the fixed TCC_Expensive value.
1007 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1009
1010 /// Return true if it is faster to check if a floating-point value is NaN
1011 /// (or not-NaN) versus a comparison against a constant FP zero value.
1012 /// Targets should override this if materializing a 0.0 for comparison is
1013 /// generally as cheap as checking for ordered/unordered.
1014 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1015
1016 /// Return the expected cost of supporting the floating point operation
1017 /// of the specified type.
1018 InstructionCost getFPOpCost(Type *Ty) const;
1019
1020 /// Return the expected cost of materializing for the given integer
1021 /// immediate of the specified type.
1022 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1023 TargetCostKind CostKind) const;
1024
1025 /// Return the expected cost of materialization for the given integer
1026 /// immediate of the specified type for a given instruction. The cost can be
1027 /// zero if the immediate can be folded into the specified instruction.
1028 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1029 const APInt &Imm, Type *Ty,
1031 Instruction *Inst = nullptr) const;
1033 const APInt &Imm, Type *Ty,
1034 TargetCostKind CostKind) const;
1035
1036 /// Return the expected cost for the given integer when optimising
1037 /// for size. This is different than the other integer immediate cost
1038 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1039 /// target one ISA such as Aarch32 but smaller encodings could be possible
1040 /// with another such as Thumb. This return value is used as a penalty when
1041 /// the total costs for a constant is calculated (the bigger the cost, the
1042 /// more beneficial constant hoisting is).
1043 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1044 const APInt &Imm, Type *Ty) const;
1045
1046 /// It can be advantageous to detach complex constants from their uses to make
1047 /// their generation cheaper. This hook allows targets to report when such
1048 /// transformations might negatively effect the code generation of the
1049 /// underlying operation. The motivating example is divides whereby hoisting
1050 /// constants prevents the code generator's ability to transform them into
1051 /// combinations of simpler operations.
1053 const Function &Fn) const;
1054
1055 /// @}
1056
1057 /// \name Vector Target Information
1058 /// @{
1059
1060 /// The various kinds of shuffle patterns for vector queries.
1062 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1063 SK_Reverse, ///< Reverse the order of the vector.
1064 SK_Select, ///< Selects elements from the corresponding lane of
1065 ///< either source operand. This is equivalent to a
1066 ///< vector select with a constant condition operand.
1067 SK_Transpose, ///< Transpose two vectors.
1068 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1069 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1070 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1071 ///< with any shuffle mask.
1072 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1073 ///< shuffle mask.
1074 SK_Splice ///< Concatenates elements from the first input vector
1075 ///< with elements of the second input vector. Returning
1076 ///< a vector of the same type as the input vectors.
1077 ///< Index indicates start offset in first input vector.
1079
1080 /// Additional information about an operand's possible values.
1082 OK_AnyValue, // Operand can have any value.
1083 OK_UniformValue, // Operand is uniform (splat of a value).
1084 OK_UniformConstantValue, // Operand is uniform constant.
1085 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1087
1088 /// Additional properties of an operand's values.
1093 };
1094
1095 // Describe the values an operand can take. We're in the process
1096 // of migrating uses of OperandValueKind and OperandValueProperties
1097 // to use this class, and then will change the internal representation.
1101
1102 bool isConstant() const {
1104 }
1105 bool isUniform() const {
1107 }
1108 bool isPowerOf2() const {
1109 return Properties == OP_PowerOf2;
1110 }
1111 bool isNegatedPowerOf2() const {
1113 }
1114
1116 return {Kind, OP_None};
1117 }
1118 };
1119
1120 /// \return the number of registers in the target-provided register class.
1121 unsigned getNumberOfRegisters(unsigned ClassID) const;
1122
1123 /// \return true if the target supports load/store that enables fault
1124 /// suppression of memory operands when the source condition is false.
1125 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
1126
1127 /// \return the target-provided register class ID for the provided type,
1128 /// accounting for type promotion and other type-legalization techniques that
1129 /// the target might apply. However, it specifically does not account for the
1130 /// scalarization or splitting of vector types. Should a vector type require
1131 /// scalarization or splitting into multiple underlying vector registers, that
1132 /// type should be mapped to a register class containing no registers.
1133 /// Specifically, this is designed to provide a simple, high-level view of the
1134 /// register allocation later performed by the backend. These register classes
1135 /// don't necessarily map onto the register classes used by the backend.
1136 /// FIXME: It's not currently possible to determine how many registers
1137 /// are used by the provided type.
1138 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1139
1140 /// \return the target-provided register class name
1141 const char *getRegisterClassName(unsigned ClassID) const;
1142
1144
1145 /// \return The width of the largest scalar or vector register type.
1147
1148 /// \return The width of the smallest vector register type.
1149 unsigned getMinVectorRegisterBitWidth() const;
1150
1151 /// \return The maximum value of vscale if the target specifies an
1152 /// architectural maximum vector length, and std::nullopt otherwise.
1153 std::optional<unsigned> getMaxVScale() const;
1154
1155 /// \return the value of vscale to tune the cost model for.
1156 std::optional<unsigned> getVScaleForTuning() const;
1157
1158 /// \return true if vscale is known to be a power of 2
1159 bool isVScaleKnownToBeAPowerOfTwo() const;
1160
1161 /// \return True if the vectorization factor should be chosen to
1162 /// make the vector of the smallest element type match the size of a
1163 /// vector register. For wider element types, this could result in
1164 /// creating vectors that span multiple vector registers.
1165 /// If false, the vectorization factor will be chosen based on the
1166 /// size of the widest element type.
1167 /// \p K Register Kind for vectorization.
1169
1170 /// \return The minimum vectorization factor for types of given element
1171 /// bit width, or 0 if there is no minimum VF. The returned value only
1172 /// applies when shouldMaximizeVectorBandwidth returns true.
1173 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1174 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1175
1176 /// \return The maximum vectorization factor for types of given element
1177 /// bit width and opcode, or 0 if there is no maximum VF.
1178 /// Currently only used by the SLP vectorizer.
1179 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1180
1181 /// \return The minimum vectorization factor for the store instruction. Given
1182 /// the initial estimation of the minimum vector factor and store value type,
1183 /// it tries to find possible lowest VF, which still might be profitable for
1184 /// the vectorization.
1185 /// \param VF Initial estimation of the minimum vector factor.
1186 /// \param ScalarMemTy Scalar memory type of the store operation.
1187 /// \param ScalarValTy Scalar type of the stored value.
1188 /// Currently only used by the SLP vectorizer.
1189 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1190 Type *ScalarValTy) const;
1191
1192 /// \return True if it should be considered for address type promotion.
1193 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1194 /// profitable without finding other extensions fed by the same input.
1196 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1197
1198 /// \return The size of a cache line in bytes.
1199 unsigned getCacheLineSize() const;
1200
1201 /// The possible cache levels
1202 enum class CacheLevel {
1203 L1D, // The L1 data cache
1204 L2D, // The L2 data cache
1205
1206 // We currently do not model L3 caches, as their sizes differ widely between
1207 // microarchitectures. Also, we currently do not have a use for L3 cache
1208 // size modeling yet.
1209 };
1210
1211 /// \return The size of the cache level in bytes, if available.
1212 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1213
1214 /// \return The associativity of the cache level, if available.
1215 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1216
1217 /// \return The minimum architectural page size for the target.
1218 std::optional<unsigned> getMinPageSize() const;
1219
1220 /// \return How much before a load we should place the prefetch
1221 /// instruction. This is currently measured in number of
1222 /// instructions.
1223 unsigned getPrefetchDistance() const;
1224
1225 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1226 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1227 /// and the arguments provided are meant to serve as a basis for deciding this
1228 /// for a particular loop.
1229 ///
1230 /// \param NumMemAccesses Number of memory accesses in the loop.
1231 /// \param NumStridedMemAccesses Number of the memory accesses that
1232 /// ScalarEvolution could find a known stride
1233 /// for.
1234 /// \param NumPrefetches Number of software prefetches that will be
1235 /// emitted as determined by the addresses
1236 /// involved and the cache line size.
1237 /// \param HasCall True if the loop contains a call.
1238 ///
1239 /// \return This is the minimum stride in bytes where it makes sense to start
1240 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1241 /// stride.
1242 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1243 unsigned NumStridedMemAccesses,
1244 unsigned NumPrefetches, bool HasCall) const;
1245
1246 /// \return The maximum number of iterations to prefetch ahead. If
1247 /// the required number of iterations is more than this number, no
1248 /// prefetching is performed.
1249 unsigned getMaxPrefetchIterationsAhead() const;
1250
1251 /// \return True if prefetching should also be done for writes.
1252 bool enableWritePrefetching() const;
1253
1254 /// \return if target want to issue a prefetch in address space \p AS.
1255 bool shouldPrefetchAddressSpace(unsigned AS) const;
1256
1257 /// \return The maximum interleave factor that any transform should try to
1258 /// perform for this target. This number depends on the level of parallelism
1259 /// and the number of execution units in the CPU.
1260 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1261
1262 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1263 static OperandValueInfo getOperandInfo(const Value *V);
1264
1265 /// This is an approximation of reciprocal throughput of a math/logic op.
1266 /// A higher cost indicates less expected throughput.
1267 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1268 /// clock cycles per instruction when the instructions are not part of a
1269 /// limiting dependency chain."
1270 /// Therefore, costs should be scaled to account for multiple execution units
1271 /// on the target that can process this type of instruction. For example, if
1272 /// there are 5 scalar integer units and 2 vector integer units that can
1273 /// calculate an 'add' in a single cycle, this model should indicate that the
1274 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1275 /// add instruction.
1276 /// \p Args is an optional argument which holds the instruction operands
1277 /// values so the TTI can analyze those values searching for special
1278 /// cases or optimizations based on those values.
1279 /// \p CxtI is the optional original context instruction, if one exists, to
1280 /// provide even more information.
1281 /// \p TLibInfo is used to search for platform specific vector library
1282 /// functions for instructions that might be converted to calls (e.g. frem).
1284 unsigned Opcode, Type *Ty,
1287 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1288 ArrayRef<const Value *> Args = std::nullopt,
1289 const Instruction *CxtI = nullptr,
1290 const TargetLibraryInfo *TLibInfo = nullptr) const;
1291
1292 /// Returns the cost estimation for alternating opcode pattern that can be
1293 /// lowered to a single instruction on the target. In X86 this is for the
1294 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1295 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1296 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1297 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1298 /// \p VecTy is the vector type of the instruction to be generated.
1299 InstructionCost getAltInstrCost(
1300 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1301 const SmallBitVector &OpcodeMask,
1303
1304 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1305 /// The exact mask may be passed as Mask, or else the array will be empty.
1306 /// The index and subtype parameters are used by the subvector insertion and
1307 /// extraction shuffle kinds to show the insert/extract point and the type of
1308 /// the subvector being inserted/extracted. The operands of the shuffle can be
1309 /// passed through \p Args, which helps improve the cost estimation in some
1310 /// cases, like in broadcast loads.
1311 /// NOTE: For subvector extractions Tp represents the source type.
1312 InstructionCost getShuffleCost(
1313 ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
1315 VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
1316 const Instruction *CxtI = nullptr) const;
1317
1318 /// Represents a hint about the context in which a cast is used.
1319 ///
1320 /// For zext/sext, the context of the cast is the operand, which must be a
1321 /// load of some kind. For trunc, the context is of the cast is the single
1322 /// user of the instruction, which must be a store of some kind.
1323 ///
1324 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1325 /// type of cast it's dealing with, as not every cast is equal. For instance,
1326 /// the zext of a load may be free, but the zext of an interleaving load can
1327 //// be (very) expensive!
1328 ///
1329 /// See \c getCastContextHint to compute a CastContextHint from a cast
1330 /// Instruction*. Callers can use it if they don't need to override the
1331 /// context and just want it to be calculated from the instruction.
1332 ///
1333 /// FIXME: This handles the types of load/store that the vectorizer can
1334 /// produce, which are the cases where the context instruction is most
1335 /// likely to be incorrect. There are other situations where that can happen
1336 /// too, which might be handled here but in the long run a more general
1337 /// solution of costing multiple instructions at the same times may be better.
1338 enum class CastContextHint : uint8_t {
1339 None, ///< The cast is not used with a load/store of any kind.
1340 Normal, ///< The cast is used with a normal load/store.
1341 Masked, ///< The cast is used with a masked load/store.
1342 GatherScatter, ///< The cast is used with a gather/scatter.
1343 Interleave, ///< The cast is used with an interleaved load/store.
1344 Reversed, ///< The cast is used with a reversed load/store.
1345 };
1346
1347 /// Calculates a CastContextHint from \p I.
1348 /// This should be used by callers of getCastInstrCost if they wish to
1349 /// determine the context from some instruction.
1350 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1351 /// or if it's another type of cast.
1353
1354 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1355 /// zext, etc. If there is an existing instruction that holds Opcode, it
1356 /// may be passed in the 'I' parameter.
1358 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1361 const Instruction *I = nullptr) const;
1362
1363 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1364 /// Index = -1 to indicate that there is no information about the index value.
1365 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1366 VectorType *VecTy,
1367 unsigned Index) const;
1368
1369 /// \return The expected cost of control-flow related instructions such as
1370 /// Phi, Ret, Br, Switch.
1372 getCFInstrCost(unsigned Opcode,
1374 const Instruction *I = nullptr) const;
1375
1376 /// \returns The expected cost of compare and select instructions. If there
1377 /// is an existing instruction that holds Opcode, it may be passed in the
1378 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1379 /// is using a compare with the specified predicate as condition. When vector
1380 /// types are passed, \p VecPred must be used for all lanes.
1382 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1383 CmpInst::Predicate VecPred,
1385 const Instruction *I = nullptr) const;
1386
1387 /// \return The expected cost of vector Insert and Extract.
1388 /// Use -1 to indicate that there is no information on the index value.
1389 /// This is used when the instruction is not available; a typical use
1390 /// case is to provision the cost of vectorization/scalarization in
1391 /// vectorizer passes.
1392 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1394 unsigned Index = -1, Value *Op0 = nullptr,
1395 Value *Op1 = nullptr) const;
1396
1397 /// \return The expected cost of vector Insert and Extract.
1398 /// This is used when instruction is available, and implementation
1399 /// asserts 'I' is not nullptr.
1400 ///
1401 /// A typical suitable use case is cost estimation when vector instruction
1402 /// exists (e.g., from basic blocks during transformation).
1405 unsigned Index = -1) const;
1406
1407 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1408 /// \p ReplicationFactor times.
1409 ///
1410 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1411 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1412 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1413 int VF,
1414 const APInt &DemandedDstElts,
1416
1417 /// \return The cost of Load and Store instructions.
1419 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1420 unsigned AddressSpace,
1422 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1423 const Instruction *I = nullptr) const;
1424
1425 /// \return The cost of VP Load and Store instructions.
1426 InstructionCost
1427 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1428 unsigned AddressSpace,
1430 const Instruction *I = nullptr) const;
1431
1432 /// \return The cost of masked Load and Store instructions.
1434 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1436
1437 /// \return The cost of Gather or Scatter operation
1438 /// \p Opcode - is a type of memory access Load or Store
1439 /// \p DataTy - a vector type of the data to be loaded or stored
1440 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1441 /// \p VariableMask - true when the memory access is predicated with a mask
1442 /// that is not a compile-time constant
1443 /// \p Alignment - alignment of single element
1444 /// \p I - the optional original context instruction, if one exists, e.g. the
1445 /// load/store to transform or the call to the gather/scatter intrinsic
1447 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1449 const Instruction *I = nullptr) const;
1450
1451 /// \return The cost of strided memory operations.
1452 /// \p Opcode - is a type of memory access Load or Store
1453 /// \p DataTy - a vector type of the data to be loaded or stored
1454 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1455 /// \p VariableMask - true when the memory access is predicated with a mask
1456 /// that is not a compile-time constant
1457 /// \p Alignment - alignment of single element
1458 /// \p I - the optional original context instruction, if one exists, e.g. the
1459 /// load/store to transform or the call to the gather/scatter intrinsic
1461 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1463 const Instruction *I = nullptr) const;
1464
1465 /// \return The cost of the interleaved memory operation.
1466 /// \p Opcode is the memory operation code
1467 /// \p VecTy is the vector type of the interleaved access.
1468 /// \p Factor is the interleave factor
1469 /// \p Indices is the indices for interleaved load members (as interleaved
1470 /// load allows gaps)
1471 /// \p Alignment is the alignment of the memory operation
1472 /// \p AddressSpace is address space of the pointer.
1473 /// \p UseMaskForCond indicates if the memory access is predicated.
1474 /// \p UseMaskForGaps indicates if gaps should be masked.
1476 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1477 Align Alignment, unsigned AddressSpace,
1479 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1480
1481 /// A helper function to determine the type of reduction algorithm used
1482 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1483 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1484 return FMF && !(*FMF).allowReassoc();
1485 }
1486
1487 /// Calculate the cost of vector reduction intrinsics.
1488 ///
1489 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1490 /// value using the operation denoted by \p Opcode. The FastMathFlags
1491 /// parameter \p FMF indicates what type of reduction we are performing:
1492 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1493 /// involves successively splitting a vector into half and doing the
1494 /// operation on the pair of halves until you have a scalar value. For
1495 /// example:
1496 /// (v0, v1, v2, v3)
1497 /// ((v0+v2), (v1+v3), undef, undef)
1498 /// ((v0+v2+v1+v3), undef, undef, undef)
1499 /// This is the default behaviour for integer operations, whereas for
1500 /// floating point we only do this if \p FMF indicates that
1501 /// reassociation is allowed.
1502 /// 2. Ordered. For a vector with N elements this involves performing N
1503 /// operations in lane order, starting with an initial scalar value, i.e.
1504 /// result = InitVal + v0
1505 /// result = result + v1
1506 /// result = result + v2
1507 /// result = result + v3
1508 /// This is only the case for FP operations and when reassociation is not
1509 /// allowed.
1510 ///
1512 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1514
1518
1519 /// Calculate the cost of an extended reduction pattern, similar to
1520 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1521 /// extensions. This is the cost of as:
1522 /// ResTy vecreduce.add(mul (A, B)).
1523 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1525 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1527
1528 /// Calculate the cost of an extended reduction pattern, similar to
1529 /// getArithmeticReductionCost of a reduction with an extension.
1530 /// This is the cost of as:
1531 /// ResTy vecreduce.opcode(ext(Ty A)).
1533 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1534 FastMathFlags FMF,
1536
1537 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1538 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1539 /// 3. scalar instruction which is to be vectorized.
1542
1543 /// \returns The cost of Call instructions.
1547
1548 /// \returns The number of pieces into which the provided type must be
1549 /// split during legalization. Zero is returned when the answer is unknown.
1550 unsigned getNumberOfParts(Type *Tp) const;
1551
1552 /// \returns The cost of the address computation. For most targets this can be
1553 /// merged into the instruction indexing mode. Some targets might want to
1554 /// distinguish between address computation for memory operations on vector
1555 /// types and scalar types. Such targets should override this function.
1556 /// The 'SE' parameter holds pointer for the scalar evolution object which
1557 /// is used in order to get the Ptr step value in case of constant stride.
1558 /// The 'Ptr' parameter holds SCEV of the access pointer.
1560 ScalarEvolution *SE = nullptr,
1561 const SCEV *Ptr = nullptr) const;
1562
1563 /// \returns The cost, if any, of keeping values of the given types alive
1564 /// over a callsite.
1565 ///
1566 /// Some types may require the use of register classes that do not have
1567 /// any callee-saved registers, so would require a spill and fill.
1569
1570 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1571 /// will contain additional information - whether the intrinsic may write
1572 /// or read to memory, volatility and the pointer. Info is undefined
1573 /// if false is returned.
1575
1576 /// \returns The maximum element size, in bytes, for an element
1577 /// unordered-atomic memory intrinsic.
1578 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1579
1580 /// \returns A value which is the result of the given memory intrinsic. New
1581 /// instructions may be created to extract the result from the given intrinsic
1582 /// memory operation. Returns nullptr if the target cannot create a result
1583 /// from the given intrinsic.
1585 Type *ExpectedType) const;
1586
1587 /// \returns The type to use in a loop expansion of a memcpy call.
1589 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1590 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1591 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1592
1593 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1594 /// \param RemainingBytes The number of bytes to copy.
1595 ///
1596 /// Calculates the operand types to use when copying \p RemainingBytes of
1597 /// memory, where source and destination alignments are \p SrcAlign and
1598 /// \p DestAlign respectively.
1600 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1601 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1602 unsigned SrcAlign, unsigned DestAlign,
1603 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1604
1605 /// \returns True if the two functions have compatible attributes for inlining
1606 /// purposes.
1607 bool areInlineCompatible(const Function *Caller,
1608 const Function *Callee) const;
1609
1610 /// Returns a penalty for invoking call \p Call in \p F.
1611 /// For example, if a function F calls a function G, which in turn calls
1612 /// function H, then getInlineCallPenalty(F, H()) would return the
1613 /// penalty of calling H from F, e.g. after inlining G into F.
1614 /// \p DefaultCallPenalty is passed to give a default penalty that
1615 /// the target can amend or override.
1616 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1617 unsigned DefaultCallPenalty) const;
1618
1619 /// \returns True if the caller and callee agree on how \p Types will be
1620 /// passed to or returned from the callee.
1621 /// to the callee.
1622 /// \param Types List of types to check.
1623 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1624 const ArrayRef<Type *> &Types) const;
1625
1626 /// The type of load/store indexing.
1628 MIM_Unindexed, ///< No indexing.
1629 MIM_PreInc, ///< Pre-incrementing.
1630 MIM_PreDec, ///< Pre-decrementing.
1631 MIM_PostInc, ///< Post-incrementing.
1632 MIM_PostDec ///< Post-decrementing.
1634
1635 /// \returns True if the specified indexed load for the given type is legal.
1636 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1637
1638 /// \returns True if the specified indexed store for the given type is legal.
1639 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1640
1641 /// \returns The bitwidth of the largest vector type that should be used to
1642 /// load/store in the given address space.
1643 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1644
1645 /// \returns True if the load instruction is legal to vectorize.
1646 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1647
1648 /// \returns True if the store instruction is legal to vectorize.
1649 bool isLegalToVectorizeStore(StoreInst *SI) const;
1650
1651 /// \returns True if it is legal to vectorize the given load chain.
1652 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1653 unsigned AddrSpace) const;
1654
1655 /// \returns True if it is legal to vectorize the given store chain.
1656 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1657 unsigned AddrSpace) const;
1658
1659 /// \returns True if it is legal to vectorize the given reduction kind.
1661 ElementCount VF) const;
1662
1663 /// \returns True if the given type is supported for scalable vectors
1665
1666 /// \returns The new vector factor value if the target doesn't support \p
1667 /// SizeInBytes loads or has a better vector factor.
1668 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1669 unsigned ChainSizeInBytes,
1670 VectorType *VecTy) const;
1671
1672 /// \returns The new vector factor value if the target doesn't support \p
1673 /// SizeInBytes stores or has a better vector factor.
1674 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1675 unsigned ChainSizeInBytes,
1676 VectorType *VecTy) const;
1677
1678 /// Flags describing the kind of vector reduction.
1680 ReductionFlags() = default;
1681 bool IsMaxOp =
1682 false; ///< If the op a min/max kind, true if it's a max operation.
1683 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1684 bool NoNaN =
1685 false; ///< If op is an fp min/max, whether NaNs may be present.
1686 };
1687
1688 /// \returns True if the targets prefers fixed width vectorization if the
1689 /// loop vectorizer's cost-model assigns an equal cost to the fixed and
1690 /// scalable version of the vectorized loop.
1692
1693 /// \returns True if the target prefers reductions in loop.
1694 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1695 ReductionFlags Flags) const;
1696
1697 /// \returns True if the target prefers reductions select kept in the loop
1698 /// when tail folding. i.e.
1699 /// loop:
1700 /// p = phi (0, s)
1701 /// a = add (p, x)
1702 /// s = select (mask, a, p)
1703 /// vecreduce.add(s)
1704 ///
1705 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1706 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1707 /// by the target, this can lead to cleaner code generation.
1708 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1709 ReductionFlags Flags) const;
1710
1711 /// Return true if the loop vectorizer should consider vectorizing an
1712 /// otherwise scalar epilogue loop.
1713 bool preferEpilogueVectorization() const;
1714
1715 /// \returns True if the target wants to expand the given reduction intrinsic
1716 /// into a shuffle sequence.
1717 bool shouldExpandReduction(const IntrinsicInst *II) const;
1718
1720
1721 /// \returns The shuffle sequence pattern used to expand the given reduction
1722 /// intrinsic.
1725
1726 /// \returns the size cost of rematerializing a GlobalValue address relative
1727 /// to a stack reload.
1728 unsigned getGISelRematGlobalCost() const;
1729
1730 /// \returns the lower bound of a trip count to decide on vectorization
1731 /// while tail-folding.
1732 unsigned getMinTripCountTailFoldingThreshold() const;
1733
1734 /// \returns True if the target supports scalable vectors.
1735 bool supportsScalableVectors() const;
1736
1737 /// \return true when scalable vectorization is preferred.
1738 bool enableScalableVectorization() const;
1739
1740 /// \name Vector Predication Information
1741 /// @{
1742 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1743 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1744 /// Reference - "Vector Predication Intrinsics").
1745 /// Use of %evl is discouraged when that is not the case.
1746 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1747 Align Alignment) const;
1748
1751 // keep the predicating parameter
1753 // where legal, discard the predicate parameter
1755 // transform into something else that is also predicating
1756 Convert = 2
1758
1759 // How to transform the EVL parameter.
1760 // Legal: keep the EVL parameter as it is.
1761 // Discard: Ignore the EVL parameter where it is safe to do so.
1762 // Convert: Fold the EVL into the mask parameter.
1764
1765 // How to transform the operator.
1766 // Legal: The target supports this operator.
1767 // Convert: Convert this to a non-VP operation.
1768 // The 'Discard' strategy is invalid.
1770
1771 bool shouldDoNothing() const {
1772 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1773 }
1776 };
1777
1778 /// \returns How the target needs this vector-predicated operation to be
1779 /// transformed.
1781 /// @}
1782
1783 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1784 /// state.
1785 ///
1786 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1787 /// node containing a jump table in a format suitable for the target, so it
1788 /// needs to know what format of jump table it can legally use.
1789 ///
1790 /// For non-Arm targets, this function isn't used. It defaults to returning
1791 /// false, but it shouldn't matter what it returns anyway.
1792 bool hasArmWideBranch(bool Thumb) const;
1793
1794 /// \return The maximum number of function arguments the target supports.
1795 unsigned getMaxNumArgs() const;
1796
1797 /// @}
1798
1799private:
1800 /// The abstract base class used to type erase specific TTI
1801 /// implementations.
1802 class Concept;
1803
1804 /// The template model for the base class which wraps a concrete
1805 /// implementation in a type erased interface.
1806 template <typename T> class Model;
1807
1808 std::unique_ptr<Concept> TTIImpl;
1809};
1810
1812public:
1813 virtual ~Concept() = 0;
1814 virtual const DataLayout &getDataLayout() const = 0;
1815 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1817 Type *AccessType,
1819 virtual InstructionCost
1821 const TTI::PointersChainInfo &Info, Type *AccessTy,
1823 virtual unsigned getInliningThresholdMultiplier() const = 0;
1825 virtual unsigned
1827 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1828 virtual int getInlinerVectorBonusPercent() const = 0;
1829 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1830 const AllocaInst *AI) const = 0;
1833 virtual unsigned
1835 ProfileSummaryInfo *PSI,
1836 BlockFrequencyInfo *BFI) = 0;
1842 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1843 virtual bool isSourceOfDivergence(const Value *V) = 0;
1844 virtual bool isAlwaysUniform(const Value *V) = 0;
1845 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1846 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1847 virtual unsigned getFlatAddressSpace() = 0;
1849 Intrinsic::ID IID) const = 0;
1850 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1851 virtual bool
1853 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1854 virtual bool isSingleThreaded() const = 0;
1855 virtual std::pair<const Value *, unsigned>
1856 getPredicatedAddrSpace(const Value *V) const = 0;
1858 Value *OldV,
1859 Value *NewV) const = 0;
1860 virtual bool isLoweredToCall(const Function *F) = 0;
1863 OptimizationRemarkEmitter *ORE) = 0;
1865 PeelingPreferences &PP) = 0;
1867 AssumptionCache &AC,
1868 TargetLibraryInfo *LibInfo,
1869 HardwareLoopInfo &HWLoopInfo) = 0;
1871 virtual TailFoldingStyle
1872 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1873 virtual std::optional<Instruction *> instCombineIntrinsic(
1874 InstCombiner &IC, IntrinsicInst &II) = 0;
1875 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1876 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1877 KnownBits & Known, bool &KnownBitsComputed) = 0;
1878 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1879 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1880 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1881 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1882 SimplifyAndSetOp) = 0;
1883 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1884 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
1885 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1886 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1887 int64_t BaseOffset, bool HasBaseReg,
1888 int64_t Scale, unsigned AddrSpace,
1889 Instruction *I,
1890 int64_t ScalableOffset) = 0;
1892 const TargetTransformInfo::LSRCost &C2) = 0;
1893 virtual bool isNumRegsMajorCostOfLSR() = 0;
1897 virtual bool canMacroFuseCmp() = 0;
1898 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1900 TargetLibraryInfo *LibInfo) = 0;
1901 virtual AddressingModeKind
1903 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1904 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1905 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1906 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1907 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1908 ElementCount NumElements) const = 0;
1909 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1910 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1912 Align Alignment) = 0;
1914 Align Alignment) = 0;
1915 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1916 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1917 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1918 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) = 0;
1919 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1920 unsigned Opcode1,
1921 const SmallBitVector &OpcodeMask) const = 0;
1922 virtual bool enableOrderedReductions() = 0;
1923 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1924 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1927 StackOffset BaseOffset,
1928 bool HasBaseReg, int64_t Scale,
1929 unsigned AddrSpace) = 0;
1930 virtual bool LSRWithInstrQueries() = 0;
1931 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1933 virtual bool useAA() = 0;
1934 virtual bool isTypeLegal(Type *Ty) = 0;
1935 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1936 virtual bool shouldBuildLookupTables() = 0;
1938 virtual bool shouldBuildRelLookupTables() = 0;
1939 virtual bool useColdCCForColdCall(Function &F) = 0;
1941 const APInt &DemandedElts,
1942 bool Insert, bool Extract,
1944 virtual InstructionCost
1946 ArrayRef<Type *> Tys,
1949 virtual bool supportsTailCalls() = 0;
1950 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1951 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1953 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1954 virtual bool enableSelectOptimize() = 0;
1960 unsigned BitWidth,
1961 unsigned AddressSpace,
1962 Align Alignment,
1963 unsigned *Fast) = 0;
1964 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1965 virtual bool haveFastSqrt(Type *Ty) = 0;
1967 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1969 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1970 const APInt &Imm, Type *Ty) = 0;
1971 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1973 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1974 const APInt &Imm, Type *Ty,
1976 Instruction *Inst = nullptr) = 0;
1978 const APInt &Imm, Type *Ty,
1981 const Function &Fn) const = 0;
1982 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1983 virtual bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const = 0;
1984 virtual unsigned getRegisterClassForType(bool Vector,
1985 Type *Ty = nullptr) const = 0;
1986 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1988 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1989 virtual std::optional<unsigned> getMaxVScale() const = 0;
1990 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1991 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1992 virtual bool
1994 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1995 bool IsScalable) const = 0;
1996 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1997 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1998 Type *ScalarValTy) const = 0;
2000 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
2001 virtual unsigned getCacheLineSize() const = 0;
2002 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
2003 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
2004 const = 0;
2005 virtual std::optional<unsigned> getMinPageSize() const = 0;
2006
2007 /// \return How much before a load we should place the prefetch
2008 /// instruction. This is currently measured in number of
2009 /// instructions.
2010 virtual unsigned getPrefetchDistance() const = 0;
2011
2012 /// \return Some HW prefetchers can handle accesses up to a certain
2013 /// constant stride. This is the minimum stride in bytes where it
2014 /// makes sense to start adding SW prefetches. The default is 1,
2015 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
2016 /// even below the HW prefetcher limit, and the arguments provided are
2017 /// meant to serve as a basis for deciding this for a particular loop.
2018 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2019 unsigned NumStridedMemAccesses,
2020 unsigned NumPrefetches,
2021 bool HasCall) const = 0;
2022
2023 /// \return The maximum number of iterations to prefetch ahead. If
2024 /// the required number of iterations is more than this number, no
2025 /// prefetching is performed.
2026 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
2027
2028 /// \return True if prefetching should also be done for writes.
2029 virtual bool enableWritePrefetching() const = 0;
2030
2031 /// \return if target want to issue a prefetch in address space \p AS.
2032 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
2033
2034 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2036 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2037 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2038 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2040 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2041 const SmallBitVector &OpcodeMask,
2043
2044 virtual InstructionCost
2047 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2048 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2049 Type *Src, CastContextHint CCH,
2051 const Instruction *I) = 0;
2052 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2053 VectorType *VecTy,
2054 unsigned Index) = 0;
2055 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2057 const Instruction *I = nullptr) = 0;
2058 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2059 Type *CondTy,
2060 CmpInst::Predicate VecPred,
2062 const Instruction *I) = 0;
2063 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2065 unsigned Index, Value *Op0,
2066 Value *Op1) = 0;
2069 unsigned Index) = 0;
2070
2071 virtual InstructionCost
2072 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2073 const APInt &DemandedDstElts,
2075
2076 virtual InstructionCost
2077 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2079 OperandValueInfo OpInfo, const Instruction *I) = 0;
2080 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2081 Align Alignment,
2082 unsigned AddressSpace,
2084 const Instruction *I) = 0;
2085 virtual InstructionCost
2086 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2087 unsigned AddressSpace,
2089 virtual InstructionCost
2090 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2091 bool VariableMask, Align Alignment,
2093 const Instruction *I = nullptr) = 0;
2094 virtual InstructionCost
2095 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2096 bool VariableMask, Align Alignment,
2098 const Instruction *I = nullptr) = 0;
2099
2101 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2102 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2103 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2104 virtual InstructionCost
2106 std::optional<FastMathFlags> FMF,
2108 virtual InstructionCost
2112 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2113 FastMathFlags FMF,
2116 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2118 virtual InstructionCost
2122 ArrayRef<Type *> Tys,
2124 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2125 virtual InstructionCost
2127 virtual InstructionCost
2130 MemIntrinsicInfo &Info) = 0;
2131 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2133 Type *ExpectedType) = 0;
2135 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2136 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2137 std::optional<uint32_t> AtomicElementSize) const = 0;
2138
2140 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2141 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2142 unsigned SrcAlign, unsigned DestAlign,
2143 std::optional<uint32_t> AtomicCpySize) const = 0;
2144 virtual bool areInlineCompatible(const Function *Caller,
2145 const Function *Callee) const = 0;
2146 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2147 unsigned DefaultCallPenalty) const = 0;
2148 virtual bool areTypesABICompatible(const Function *Caller,
2149 const Function *Callee,
2150 const ArrayRef<Type *> &Types) const = 0;
2151 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2152 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2153 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2154 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2155 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2156 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2157 Align Alignment,
2158 unsigned AddrSpace) const = 0;
2159 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2160 Align Alignment,
2161 unsigned AddrSpace) const = 0;
2163 ElementCount VF) const = 0;
2164 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2165 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2166 unsigned ChainSizeInBytes,
2167 VectorType *VecTy) const = 0;
2168 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2169 unsigned ChainSizeInBytes,
2170 VectorType *VecTy) const = 0;
2171 virtual bool preferFixedOverScalableIfEqualCost() const = 0;
2172 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2173 ReductionFlags) const = 0;
2174 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2175 ReductionFlags) const = 0;
2176 virtual bool preferEpilogueVectorization() const = 0;
2177
2178 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2179 virtual ReductionShuffle
2181 virtual unsigned getGISelRematGlobalCost() const = 0;
2182 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2183 virtual bool enableScalableVectorization() const = 0;
2184 virtual bool supportsScalableVectors() const = 0;
2185 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2186 Align Alignment) const = 0;
2187 virtual VPLegalization
2189 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2190 virtual unsigned getMaxNumArgs() const = 0;
2191};
2192
2193template <typename T>
2194class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2195 T Impl;
2196
2197public:
2198 Model(T Impl) : Impl(std::move(Impl)) {}
2199 ~Model() override = default;
2200
2201 const DataLayout &getDataLayout() const override {
2202 return Impl.getDataLayout();
2203 }
2204
2205 InstructionCost
2206 getGEPCost(Type *PointeeType, const Value *Ptr,
2207 ArrayRef<const Value *> Operands, Type *AccessType,
2209 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2210 }
2211 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2212 const Value *Base,
2213 const PointersChainInfo &Info,
2214 Type *AccessTy,
2215 TargetCostKind CostKind) override {
2216 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2217 }
2218 unsigned getInliningThresholdMultiplier() const override {
2219 return Impl.getInliningThresholdMultiplier();
2220 }
2221 unsigned adjustInliningThreshold(const CallBase *CB) override {
2222 return Impl.adjustInliningThreshold(CB);
2223 }
2224 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2225 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2226 }
2227 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2228 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2229 }
2230 int getInlinerVectorBonusPercent() const override {
2231 return Impl.getInlinerVectorBonusPercent();
2232 }
2233 unsigned getCallerAllocaCost(const CallBase *CB,
2234 const AllocaInst *AI) const override {
2235 return Impl.getCallerAllocaCost(CB, AI);
2236 }
2237 InstructionCost getMemcpyCost(const Instruction *I) override {
2238 return Impl.getMemcpyCost(I);
2239 }
2240
2241 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2242 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2243 }
2244
2245 InstructionCost getInstructionCost(const User *U,
2246 ArrayRef<const Value *> Operands,
2247 TargetCostKind CostKind) override {
2248 return Impl.getInstructionCost(U, Operands, CostKind);
2249 }
2250 BranchProbability getPredictableBranchThreshold() override {
2251 return Impl.getPredictableBranchThreshold();
2252 }
2253 InstructionCost getBranchMispredictPenalty() override {
2254 return Impl.getBranchMispredictPenalty();
2255 }
2256 bool hasBranchDivergence(const Function *F = nullptr) override {
2257 return Impl.hasBranchDivergence(F);
2258 }
2259 bool isSourceOfDivergence(const Value *V) override {
2260 return Impl.isSourceOfDivergence(V);
2261 }
2262
2263 bool isAlwaysUniform(const Value *V) override {
2264 return Impl.isAlwaysUniform(V);
2265 }
2266
2267 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2268 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2269 }
2270
2271 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2272 return Impl.addrspacesMayAlias(AS0, AS1);
2273 }
2274
2275 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2276
2277 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2278 Intrinsic::ID IID) const override {
2279 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2280 }
2281
2282 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2283 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2284 }
2285
2286 bool
2287 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2288 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2289 }
2290
2291 unsigned getAssumedAddrSpace(const Value *V) const override {
2292 return Impl.getAssumedAddrSpace(V);
2293 }
2294
2295 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2296
2297 std::pair<const Value *, unsigned>
2298 getPredicatedAddrSpace(const Value *V) const override {
2299 return Impl.getPredicatedAddrSpace(V);
2300 }
2301
2302 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2303 Value *NewV) const override {
2304 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2305 }
2306
2307 bool isLoweredToCall(const Function *F) override {
2308 return Impl.isLoweredToCall(F);
2309 }
2310 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2311 UnrollingPreferences &UP,
2312 OptimizationRemarkEmitter *ORE) override {
2313 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2314 }
2315 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2316 PeelingPreferences &PP) override {
2317 return Impl.getPeelingPreferences(L, SE, PP);
2318 }
2319 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2320 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2321 HardwareLoopInfo &HWLoopInfo) override {
2322 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2323 }
2324 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2325 return Impl.preferPredicateOverEpilogue(TFI);
2326 }
2328 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2329 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2330 }
2331 std::optional<Instruction *>
2332 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2333 return Impl.instCombineIntrinsic(IC, II);
2334 }
2335 std::optional<Value *>
2336 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2337 APInt DemandedMask, KnownBits &Known,
2338 bool &KnownBitsComputed) override {
2339 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2340 KnownBitsComputed);
2341 }
2342 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2343 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2344 APInt &UndefElts2, APInt &UndefElts3,
2345 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2346 SimplifyAndSetOp) override {
2347 return Impl.simplifyDemandedVectorEltsIntrinsic(
2348 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2349 SimplifyAndSetOp);
2350 }
2351 bool isLegalAddImmediate(int64_t Imm) override {
2352 return Impl.isLegalAddImmediate(Imm);
2353 }
2354 bool isLegalAddScalableImmediate(int64_t Imm) override {
2355 return Impl.isLegalAddScalableImmediate(Imm);
2356 }
2357 bool isLegalICmpImmediate(int64_t Imm) override {
2358 return Impl.isLegalICmpImmediate(Imm);
2359 }
2360 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2361 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2362 Instruction *I, int64_t ScalableOffset) override {
2363 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2364 AddrSpace, I, ScalableOffset);
2365 }
2366 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2367 const TargetTransformInfo::LSRCost &C2) override {
2368 return Impl.isLSRCostLess(C1, C2);
2369 }
2370 bool isNumRegsMajorCostOfLSR() override {
2371 return Impl.isNumRegsMajorCostOfLSR();
2372 }
2373 bool shouldFoldTerminatingConditionAfterLSR() const override {
2374 return Impl.shouldFoldTerminatingConditionAfterLSR();
2375 }
2376 bool shouldDropLSRSolutionIfLessProfitable() const override {
2377 return Impl.shouldDropLSRSolutionIfLessProfitable();
2378 }
2379 bool isProfitableLSRChainElement(Instruction *I) override {
2380 return Impl.isProfitableLSRChainElement(I);
2381 }
2382 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2383 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2384 DominatorTree *DT, AssumptionCache *AC,
2385 TargetLibraryInfo *LibInfo) override {
2386 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2387 }
2389 getPreferredAddressingMode(const Loop *L,
2390 ScalarEvolution *SE) const override {
2391 return Impl.getPreferredAddressingMode(L, SE);
2392 }
2393 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2394 return Impl.isLegalMaskedStore(DataType, Alignment);
2395 }
2396 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2397 return Impl.isLegalMaskedLoad(DataType, Alignment);
2398 }
2399 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2400 return Impl.isLegalNTStore(DataType, Alignment);
2401 }
2402 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2403 return Impl.isLegalNTLoad(DataType, Alignment);
2404 }
2405 bool isLegalBroadcastLoad(Type *ElementTy,
2406 ElementCount NumElements) const override {
2407 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2408 }
2409 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2410 return Impl.isLegalMaskedScatter(DataType, Alignment);
2411 }
2412 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2413 return Impl.isLegalMaskedGather(DataType, Alignment);
2414 }
2415 bool forceScalarizeMaskedGather(VectorType *DataType,
2416 Align Alignment) override {
2417 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2418 }
2419 bool forceScalarizeMaskedScatter(VectorType *DataType,
2420 Align Alignment) override {
2421 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2422 }
2423 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2424 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2425 }
2426 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2427 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2428 }
2429 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2430 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2431 }
2432 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) override {
2433 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2434 }
2435 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2436 const SmallBitVector &OpcodeMask) const override {
2437 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2438 }
2439 bool enableOrderedReductions() override {
2440 return Impl.enableOrderedReductions();
2441 }
2442 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2443 return Impl.hasDivRemOp(DataType, IsSigned);
2444 }
2445 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2446 return Impl.hasVolatileVariant(I, AddrSpace);
2447 }
2448 bool prefersVectorizedAddressing() override {
2449 return Impl.prefersVectorizedAddressing();
2450 }
2451 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2452 StackOffset BaseOffset, bool HasBaseReg,
2453 int64_t Scale,
2454 unsigned AddrSpace) override {
2455 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2456 AddrSpace);
2457 }
2458 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2459 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2460 return Impl.isTruncateFree(Ty1, Ty2);
2461 }
2462 bool isProfitableToHoist(Instruction *I) override {
2463 return Impl.isProfitableToHoist(I);
2464 }
2465 bool useAA() override { return Impl.useAA(); }
2466 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2467 unsigned getRegUsageForType(Type *Ty) override {
2468 return Impl.getRegUsageForType(Ty);
2469 }
2470 bool shouldBuildLookupTables() override {
2471 return Impl.shouldBuildLookupTables();
2472 }
2473 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2474 return Impl.shouldBuildLookupTablesForConstant(C);
2475 }
2476 bool shouldBuildRelLookupTables() override {
2477 return Impl.shouldBuildRelLookupTables();
2478 }
2479 bool useColdCCForColdCall(Function &F) override {
2480 return Impl.useColdCCForColdCall(F);
2481 }
2482
2483 InstructionCost getScalarizationOverhead(VectorType *Ty,
2484 const APInt &DemandedElts,
2485 bool Insert, bool Extract,
2486 TargetCostKind CostKind) override {
2487 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2488 CostKind);
2489 }
2490 InstructionCost
2491 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2492 ArrayRef<Type *> Tys,
2493 TargetCostKind CostKind) override {
2494 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2495 }
2496
2497 bool supportsEfficientVectorElementLoadStore() override {
2498 return Impl.supportsEfficientVectorElementLoadStore();
2499 }
2500
2501 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2502 bool supportsTailCallFor(const CallBase *CB) override {
2503 return Impl.supportsTailCallFor(CB);
2504 }
2505
2506 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2507 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2508 }
2509 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2510 bool IsZeroCmp) const override {
2511 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2512 }
2513 bool enableSelectOptimize() override {
2514 return Impl.enableSelectOptimize();
2515 }
2516 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2517 return Impl.shouldTreatInstructionLikeSelect(I);
2518 }
2519 bool enableInterleavedAccessVectorization() override {
2520 return Impl.enableInterleavedAccessVectorization();
2521 }
2522 bool enableMaskedInterleavedAccessVectorization() override {
2523 return Impl.enableMaskedInterleavedAccessVectorization();
2524 }
2525 bool isFPVectorizationPotentiallyUnsafe() override {
2526 return Impl.isFPVectorizationPotentiallyUnsafe();
2527 }
2528 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2529 unsigned AddressSpace, Align Alignment,
2530 unsigned *Fast) override {
2531 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2532 Alignment, Fast);
2533 }
2534 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2535 return Impl.getPopcntSupport(IntTyWidthInBit);
2536 }
2537 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2538
2539 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2540 return Impl.isExpensiveToSpeculativelyExecute(I);
2541 }
2542
2543 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2544 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2545 }
2546
2547 InstructionCost getFPOpCost(Type *Ty) override {
2548 return Impl.getFPOpCost(Ty);
2549 }
2550
2551 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2552 const APInt &Imm, Type *Ty) override {
2553 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2554 }
2555 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2556 TargetCostKind CostKind) override {
2557 return Impl.getIntImmCost(Imm, Ty, CostKind);
2558 }
2559 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2560 const APInt &Imm, Type *Ty,
2562 Instruction *Inst = nullptr) override {
2563 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2564 }
2565 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2566 const APInt &Imm, Type *Ty,
2567 TargetCostKind CostKind) override {
2568 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2569 }
2570 bool preferToKeepConstantsAttached(const Instruction &Inst,
2571 const Function &Fn) const override {
2572 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2573 }
2574 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2575 return Impl.getNumberOfRegisters(ClassID);
2576 }
2577 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const override {
2578 return Impl.hasConditionalLoadStoreForType(Ty);
2579 }
2580 unsigned getRegisterClassForType(bool Vector,
2581 Type *Ty = nullptr) const override {
2582 return Impl.getRegisterClassForType(Vector, Ty);
2583 }
2584 const char *getRegisterClassName(unsigned ClassID) const override {
2585 return Impl.getRegisterClassName(ClassID);
2586 }
2587 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2588 return Impl.getRegisterBitWidth(K);
2589 }
2590 unsigned getMinVectorRegisterBitWidth() const override {
2591 return Impl.getMinVectorRegisterBitWidth();
2592 }
2593 std::optional<unsigned> getMaxVScale() const override {
2594 return Impl.getMaxVScale();
2595 }
2596 std::optional<unsigned> getVScaleForTuning() const override {
2597 return Impl.getVScaleForTuning();
2598 }
2599 bool isVScaleKnownToBeAPowerOfTwo() const override {
2600 return Impl.isVScaleKnownToBeAPowerOfTwo();
2601 }
2602 bool shouldMaximizeVectorBandwidth(
2603 TargetTransformInfo::RegisterKind K) const override {
2604 return Impl.shouldMaximizeVectorBandwidth(K);
2605 }
2606 ElementCount getMinimumVF(unsigned ElemWidth,
2607 bool IsScalable) const override {
2608 return Impl.getMinimumVF(ElemWidth, IsScalable);
2609 }
2610 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2611 return Impl.getMaximumVF(ElemWidth, Opcode);
2612 }
2613 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2614 Type *ScalarValTy) const override {
2615 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2616 }
2617 bool shouldConsiderAddressTypePromotion(
2618 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2619 return Impl.shouldConsiderAddressTypePromotion(
2620 I, AllowPromotionWithoutCommonHeader);
2621 }
2622 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2623 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2624 return Impl.getCacheSize(Level);
2625 }
2626 std::optional<unsigned>
2627 getCacheAssociativity(CacheLevel Level) const override {
2628 return Impl.getCacheAssociativity(Level);
2629 }
2630
2631 std::optional<unsigned> getMinPageSize() const override {
2632 return Impl.getMinPageSize();
2633 }
2634
2635 /// Return the preferred prefetch distance in terms of instructions.
2636 ///
2637 unsigned getPrefetchDistance() const override {
2638 return Impl.getPrefetchDistance();
2639 }
2640
2641 /// Return the minimum stride necessary to trigger software
2642 /// prefetching.
2643 ///
2644 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2645 unsigned NumStridedMemAccesses,
2646 unsigned NumPrefetches,
2647 bool HasCall) const override {
2648 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2649 NumPrefetches, HasCall);
2650 }
2651
2652 /// Return the maximum prefetch distance in terms of loop
2653 /// iterations.
2654 ///
2655 unsigned getMaxPrefetchIterationsAhead() const override {
2656 return Impl.getMaxPrefetchIterationsAhead();
2657 }
2658
2659 /// \return True if prefetching should also be done for writes.
2660 bool enableWritePrefetching() const override {
2661 return Impl.enableWritePrefetching();
2662 }
2663
2664 /// \return if target want to issue a prefetch in address space \p AS.
2665 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2666 return Impl.shouldPrefetchAddressSpace(AS);
2667 }
2668
2669 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2670 return Impl.getMaxInterleaveFactor(VF);
2671 }
2672 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2673 unsigned &JTSize,
2674 ProfileSummaryInfo *PSI,
2675 BlockFrequencyInfo *BFI) override {
2676 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2677 }
2678 InstructionCost getArithmeticInstrCost(
2679 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2680 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2681 ArrayRef<const Value *> Args,
2682 const Instruction *CxtI = nullptr) override {
2683 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2684 Args, CxtI);
2685 }
2686 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2687 unsigned Opcode1,
2688 const SmallBitVector &OpcodeMask,
2689 TTI::TargetCostKind CostKind) const override {
2690 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2691 }
2692
2693 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2694 ArrayRef<int> Mask,
2696 VectorType *SubTp,
2697 ArrayRef<const Value *> Args,
2698 const Instruction *CxtI) override {
2699 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2700 CxtI);
2701 }
2702 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2703 CastContextHint CCH,
2705 const Instruction *I) override {
2706 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2707 }
2708 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2709 VectorType *VecTy,
2710 unsigned Index) override {
2711 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2712 }
2713 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2714 const Instruction *I = nullptr) override {
2715 return Impl.getCFInstrCost(Opcode, CostKind, I);
2716 }
2717 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2718 CmpInst::Predicate VecPred,
2720 const Instruction *I) override {
2721 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2722 }
2723 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2725 unsigned Index, Value *Op0,
2726 Value *Op1) override {
2727 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2728 }
2729 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2731 unsigned Index) override {
2732 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2733 }
2734 InstructionCost
2735 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2736 const APInt &DemandedDstElts,
2737 TTI::TargetCostKind CostKind) override {
2738 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2739 DemandedDstElts, CostKind);
2740 }
2741 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2742 unsigned AddressSpace,
2744 OperandValueInfo OpInfo,
2745 const Instruction *I) override {
2746 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2747 OpInfo, I);
2748 }
2749 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2750 unsigned AddressSpace,
2752 const Instruction *I) override {
2753 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2754 CostKind, I);
2755 }
2756 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2757 Align Alignment, unsigned AddressSpace,
2758 TTI::TargetCostKind CostKind) override {
2759 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2760 CostKind);
2761 }
2762 InstructionCost
2763 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2764 bool VariableMask, Align Alignment,
2766 const Instruction *I = nullptr) override {
2767 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2768 Alignment, CostKind, I);
2769 }
2770 InstructionCost
2771 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2772 bool VariableMask, Align Alignment,
2774 const Instruction *I = nullptr) override {
2775 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2776 Alignment, CostKind, I);
2777 }
2778 InstructionCost getInterleavedMemoryOpCost(
2779 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2780 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2781 bool UseMaskForCond, bool UseMaskForGaps) override {
2782 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2783 Alignment, AddressSpace, CostKind,
2784 UseMaskForCond, UseMaskForGaps);
2785 }
2786 InstructionCost
2787 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2788 std::optional<FastMathFlags> FMF,
2789 TTI::TargetCostKind CostKind) override {
2790 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2791 }
2792 InstructionCost
2793 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2794 TTI::TargetCostKind CostKind) override {
2795 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2796 }
2797 InstructionCost
2798 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2799 VectorType *Ty, FastMathFlags FMF,
2800 TTI::TargetCostKind CostKind) override {
2801 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2802 CostKind);
2803 }
2804 InstructionCost
2805 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2806 TTI::TargetCostKind CostKind) override {
2807 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2808 }
2809 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2810 TTI::TargetCostKind CostKind) override {
2811 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2812 }
2813 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2814 ArrayRef<Type *> Tys,
2815 TTI::TargetCostKind CostKind) override {
2816 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2817 }
2818 unsigned getNumberOfParts(Type *Tp) override {
2819 return Impl.getNumberOfParts(Tp);
2820 }
2821 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2822 const SCEV *Ptr) override {
2823 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2824 }
2825 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2826 return Impl.getCostOfKeepingLiveOverCall(Tys);
2827 }
2828 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2829 MemIntrinsicInfo &Info) override {
2830 return Impl.getTgtMemIntrinsic(Inst, Info);
2831 }
2832 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2833 return Impl.getAtomicMemIntrinsicMaxElementSize();
2834 }
2835 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2836 Type *ExpectedType) override {
2837 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2838 }
2839 Type *getMemcpyLoopLoweringType(
2840 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2841 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2842 std::optional<uint32_t> AtomicElementSize) const override {
2843 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2844 DestAddrSpace, SrcAlign, DestAlign,
2845 AtomicElementSize);
2846 }
2847 void getMemcpyLoopResidualLoweringType(
2848 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2849 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2850 unsigned SrcAlign, unsigned DestAlign,
2851 std::optional<uint32_t> AtomicCpySize) const override {
2852 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2853 SrcAddrSpace, DestAddrSpace,
2854 SrcAlign, DestAlign, AtomicCpySize);
2855 }
2856 bool areInlineCompatible(const Function *Caller,
2857 const Function *Callee) const override {
2858 return Impl.areInlineCompatible(Caller, Callee);
2859 }
2860 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2861 unsigned DefaultCallPenalty) const override {
2862 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2863 }
2864 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2865 const ArrayRef<Type *> &Types) const override {
2866 return Impl.areTypesABICompatible(Caller, Callee, Types);
2867 }
2868 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2869 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2870 }
2871 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2872 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2873 }
2874 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2875 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2876 }
2877 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2878 return Impl.isLegalToVectorizeLoad(LI);
2879 }
2880 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2881 return Impl.isLegalToVectorizeStore(SI);
2882 }
2883 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2884 unsigned AddrSpace) const override {
2885 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2886 AddrSpace);
2887 }
2888 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2889 unsigned AddrSpace) const override {
2890 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2891 AddrSpace);
2892 }
2893 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2894 ElementCount VF) const override {
2895 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2896 }
2897 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2898 return Impl.isElementTypeLegalForScalableVector(Ty);
2899 }
2900 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2901 unsigned ChainSizeInBytes,
2902 VectorType *VecTy) const override {
2903 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2904 }
2905 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2906 unsigned ChainSizeInBytes,
2907 VectorType *VecTy) const override {
2908 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2909 }
2910 bool preferFixedOverScalableIfEqualCost() const override {
2911 return Impl.preferFixedOverScalableIfEqualCost();
2912 }
2913 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2914 ReductionFlags Flags) const override {
2915 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2916 }
2917 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2918 ReductionFlags Flags) const override {
2919 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2920 }
2921 bool preferEpilogueVectorization() const override {
2922 return Impl.preferEpilogueVectorization();
2923 }
2924
2925 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2926 return Impl.shouldExpandReduction(II);
2927 }
2928
2930 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
2931 return Impl.getPreferredExpandedReductionShuffle(II);
2932 }
2933
2934 unsigned getGISelRematGlobalCost() const override {
2935 return Impl.getGISelRematGlobalCost();
2936 }
2937
2938 unsigned getMinTripCountTailFoldingThreshold() const override {
2939 return Impl.getMinTripCountTailFoldingThreshold();
2940 }
2941
2942 bool supportsScalableVectors() const override {
2943 return Impl.supportsScalableVectors();
2944 }
2945
2946 bool enableScalableVectorization() const override {
2947 return Impl.enableScalableVectorization();
2948 }
2949
2950 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2951 Align Alignment) const override {
2952 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2953 }
2954
2956 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2957 return Impl.getVPLegalizationStrategy(PI);
2958 }
2959
2960 bool hasArmWideBranch(bool Thumb) const override {
2961 return Impl.hasArmWideBranch(Thumb);
2962 }
2963
2964 unsigned getMaxNumArgs() const override {
2965 return Impl.getMaxNumArgs();
2966 }
2967};
2968
2969template <typename T>
2971 : TTIImpl(new Model<T>(Impl)) {}
2972
2973/// Analysis pass providing the \c TargetTransformInfo.
2974///
2975/// The core idea of the TargetIRAnalysis is to expose an interface through
2976/// which LLVM targets can analyze and provide information about the middle
2977/// end's target-independent IR. This supports use cases such as target-aware
2978/// cost modeling of IR constructs.
2979///
2980/// This is a function analysis because much of the cost modeling for targets
2981/// is done in a subtarget specific way and LLVM supports compiling different
2982/// functions targeting different subtargets in order to support runtime
2983/// dispatch according to the observed subtarget.
2984class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2985public:
2987
2988 /// Default construct a target IR analysis.
2989 ///
2990 /// This will use the module's datalayout to construct a baseline
2991 /// conservative TTI result.
2993
2994 /// Construct an IR analysis pass around a target-provide callback.
2995 ///
2996 /// The callback will be called with a particular function for which the TTI
2997 /// is needed and must return a TTI object for that function.
2998 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2999
3000 // Value semantics. We spell out the constructors for MSVC.
3002 : TTICallback(Arg.TTICallback) {}
3004 : TTICallback(std::move(Arg.TTICallback)) {}
3006 TTICallback = RHS.TTICallback;
3007 return *this;
3008 }
3010 TTICallback = std::move(RHS.TTICallback);
3011 return *this;
3012 }
3013
3015
3016private:
3018 static AnalysisKey Key;
3019
3020 /// The callback used to produce a result.
3021 ///
3022 /// We use a completely opaque callback so that targets can provide whatever
3023 /// mechanism they desire for constructing the TTI for a given function.
3024 ///
3025 /// FIXME: Should we really use std::function? It's relatively inefficient.
3026 /// It might be possible to arrange for even stateful callbacks to outlive
3027 /// the analysis and thus use a function_ref which would be lighter weight.
3028 /// This may also be less error prone as the callback is likely to reference
3029 /// the external TargetMachine, and that reference needs to never dangle.
3030 std::function<Result(const Function &)> TTICallback;
3031
3032 /// Helper function used as the callback in the default constructor.
3033 static Result getDefaultTTI(const Function &F);
3034};
3035
3036/// Wrapper pass for TargetTransformInfo.
3037///
3038/// This pass can be constructed from a TTI object which it stores internally
3039/// and is queried by passes.
3041 TargetIRAnalysis TIRA;
3042 std::optional<TargetTransformInfo> TTI;
3043
3044 virtual void anchor();
3045
3046public:
3047 static char ID;
3048
3049 /// We must provide a default constructor for the pass but it should
3050 /// never be used.
3051 ///
3052 /// Use the constructor below or call one of the creation routines.
3054
3056
3058};
3059
3060/// Create an analysis pass wrapper around a TTI object.
3061///
3062/// This analysis pass just holds the TTI instance and makes it available to
3063/// clients.
3065
3066} // namespace llvm
3067
3068#endif
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:61
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:612
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual bool preferFixedOverScalableIfEqualCost() const =0
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual InstructionCost getBranchMispredictPenalty()=0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)=0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool isLegalAddScalableImmediate(int64_t Imm)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const =0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool shouldDropLSRSolutionIfLessProfitable() const =0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args, const Instruction *CxtI)=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool shouldFoldTerminatingConditionAfterLSR() const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I, int64_t ScalableOffset)=0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool shouldFoldTerminatingConditionAfterLSR() const
Return true if LSR should attempts to replace a use of an otherwise dead primary IV in the latch cond...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)