LLVM 19.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class CallBase;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class User;
67class Value;
68class VPIntrinsic;
69struct KnownBits;
70
71/// Information about a load/store intrinsic defined by the target.
73 /// This is the pointer that the intrinsic is loading from or storing to.
74 /// If this is non-null, then analysis/optimization passes can assume that
75 /// this intrinsic is functionally equivalent to a load/store from this
76 /// pointer.
77 Value *PtrVal = nullptr;
78
79 // Ordering for atomic operations.
81
82 // Same Id is set by the target for corresponding load/store intrinsics.
83 unsigned short MatchingId = 0;
84
85 bool ReadMem = false;
86 bool WriteMem = false;
87 bool IsVolatile = false;
88
89 bool isUnordered() const {
93 }
94};
95
96/// Attributes of a target dependent hardware loop.
98 HardwareLoopInfo() = delete;
100 Loop *L = nullptr;
103 const SCEV *ExitCount = nullptr;
105 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
106 // value in every iteration.
107 bool IsNestingLegal = false; // Can a hardware loop be a parent to
108 // another hardware loop?
109 bool CounterInReg = false; // Should loop counter be updated in
110 // the loop via a phi?
111 bool PerformEntryTest = false; // Generate the intrinsic which also performs
112 // icmp ne zero on the loop counter value and
113 // produces an i1 to guard the loop entry.
115 DominatorTree &DT, bool ForceNestedLoop = false,
116 bool ForceHardwareLoopPHI = false);
117 bool canAnalyze(LoopInfo &LI);
118};
119
121 const IntrinsicInst *II = nullptr;
122 Type *RetTy = nullptr;
123 Intrinsic::ID IID;
124 SmallVector<Type *, 4> ParamTys;
126 FastMathFlags FMF;
127 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128 // arguments and the return value will be computed based on types.
129 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131public:
133 Intrinsic::ID Id, const CallBase &CI,
135 bool TypeBasedOnly = false);
136
138 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
139 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
141
144
148 const IntrinsicInst *I = nullptr,
150
151 Intrinsic::ID getID() const { return IID; }
152 const IntrinsicInst *getInst() const { return II; }
153 Type *getReturnType() const { return RetTy; }
154 FastMathFlags getFlags() const { return FMF; }
155 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
157 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
158
159 bool isTypeBasedOnly() const {
160 return Arguments.empty();
161 }
162
163 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164};
165
167 /// Don't use tail folding
168 None,
169 /// Use predicate only to mask operations on data in the loop.
170 /// When the VL is not known to be a power-of-2, this method requires a
171 /// runtime overflow check for the i + VL in the loop because it compares the
172 /// scalar induction variable against the tripcount rounded up by VL which may
173 /// overflow. When the VL is a power-of-2, both the increment and uprounded
174 /// tripcount will overflow to 0, which does not require a runtime check
175 /// since the loop is exited when the loop induction variable equals the
176 /// uprounded trip-count, which are both 0.
177 Data,
178 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179 /// calculate the mask and instead implements this with a
180 /// splat/stepvector/cmp.
181 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182 /// active.lane.mask intrinsic when it is not natively supported?
184 /// Use predicate to control both data and control flow.
185 /// This method always requires a runtime overflow check for the i + VL
186 /// increment inside the loop, because it uses the result direclty in the
187 /// active.lane.mask to calculate the mask for the next iteration. If the
188 /// increment overflows, the mask is no longer correct.
190 /// Use predicate to control both data and control flow, but modify
191 /// the trip count so that a runtime overflow check can be avoided
192 /// and such that the scalar epilogue loop can always be removed.
194 /// Use predicated EVL instructions for tail-folding.
195 /// Indicates that VP intrinsics should be used.
197};
198
205 : TLI(TLI), LVL(LVL), IAI(IAI) {}
206};
207
208class TargetTransformInfo;
210
211/// This pass provides access to the codegen interfaces that are needed
212/// for IR-level transformations.
214public:
215 /// Construct a TTI object using a type implementing the \c Concept
216 /// API below.
217 ///
218 /// This is used by targets to construct a TTI wrapping their target-specific
219 /// implementation that encodes appropriate costs for their target.
220 template <typename T> TargetTransformInfo(T Impl);
221
222 /// Construct a baseline TTI object using a minimal implementation of
223 /// the \c Concept API below.
224 ///
225 /// The TTI implementation will reflect the information in the DataLayout
226 /// provided if non-null.
227 explicit TargetTransformInfo(const DataLayout &DL);
228
229 // Provide move semantics.
232
233 // We need to define the destructor out-of-line to define our sub-classes
234 // out-of-line.
236
237 /// Handle the invalidation of this information.
238 ///
239 /// When used as a result of \c TargetIRAnalysis this method will be called
240 /// when the function this was computed for changes. When it returns false,
241 /// the information is preserved across those changes.
244 // FIXME: We should probably in some way ensure that the subtarget
245 // information for a function hasn't changed.
246 return false;
247 }
248
249 /// \name Generic Target Information
250 /// @{
251
252 /// The kind of cost model.
253 ///
254 /// There are several different cost models that can be customized by the
255 /// target. The normalization of each cost model may be target specific.
256 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
257 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
259 TCK_RecipThroughput, ///< Reciprocal throughput.
260 TCK_Latency, ///< The latency of instruction.
261 TCK_CodeSize, ///< Instruction code size.
262 TCK_SizeAndLatency ///< The weighted sum of size and latency.
263 };
264
265 /// Underlying constants for 'cost' values in this interface.
266 ///
267 /// Many APIs in this interface return a cost. This enum defines the
268 /// fundamental values that should be used to interpret (and produce) those
269 /// costs. The costs are returned as an int rather than a member of this
270 /// enumeration because it is expected that the cost of one IR instruction
271 /// may have a multiplicative factor to it or otherwise won't fit directly
272 /// into the enum. Moreover, it is common to sum or average costs which works
273 /// better as simple integral values. Thus this enum only provides constants.
274 /// Also note that the returned costs are signed integers to make it natural
275 /// to add, subtract, and test with zero (a common boundary condition). It is
276 /// not expected that 2^32 is a realistic cost to be modeling at any point.
277 ///
278 /// Note that these costs should usually reflect the intersection of code-size
279 /// cost and execution cost. A free instruction is typically one that folds
280 /// into another instruction. For example, reg-to-reg moves can often be
281 /// skipped by renaming the registers in the CPU, but they still are encoded
282 /// and thus wouldn't be considered 'free' here.
284 TCC_Free = 0, ///< Expected to fold away in lowering.
285 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
286 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
287 };
288
289 /// Estimate the cost of a GEP operation when lowered.
290 ///
291 /// \p PointeeType is the source element type of the GEP.
292 /// \p Ptr is the base pointer operand.
293 /// \p Operands is the list of indices following the base pointer.
294 ///
295 /// \p AccessType is a hint as to what type of memory might be accessed by
296 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
297 /// folded into the addressing mode of a load/store. If AccessType is null,
298 /// then the resulting target type based off of PointeeType will be used as an
299 /// approximation.
301 getGEPCost(Type *PointeeType, const Value *Ptr,
302 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
304
305 /// Describe known properties for a set of pointers.
307 /// All the GEPs in a set have same base address.
308 unsigned IsSameBaseAddress : 1;
309 /// These properties only valid if SameBaseAddress is set.
310 /// True if all pointers are separated by a unit stride.
311 unsigned IsUnitStride : 1;
312 /// True if distance between any two neigbouring pointers is a known value.
313 unsigned IsKnownStride : 1;
314 unsigned Reserved : 29;
315
316 bool isSameBase() const { return IsSameBaseAddress; }
317 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
319
321 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
322 /*IsKnownStride=*/1, 0};
323 }
325 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
326 /*IsKnownStride=*/1, 0};
327 }
329 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
330 /*IsKnownStride=*/0, 0};
331 }
332 };
333 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
334
335 /// Estimate the cost of a chain of pointers (typically pointer operands of a
336 /// chain of loads or stores within same block) operations set when lowered.
337 /// \p AccessTy is the type of the loads/stores that will ultimately use the
338 /// \p Ptrs.
341 const PointersChainInfo &Info, Type *AccessTy,
343
344 ) const;
345
346 /// \returns A value by which our inlining threshold should be multiplied.
347 /// This is primarily used to bump up the inlining threshold wholesale on
348 /// targets where calls are unusually expensive.
349 ///
350 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
351 /// individual classes of instructions would be better.
352 unsigned getInliningThresholdMultiplier() const;
353
356
357 /// \returns A value to be added to the inlining threshold.
358 unsigned adjustInliningThreshold(const CallBase *CB) const;
359
360 /// \returns The cost of having an Alloca in the caller if not inlined, to be
361 /// added to the threshold
362 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
363
364 /// \returns Vector bonus in percent.
365 ///
366 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
367 /// and apply this bonus based on the percentage of vector instructions. A
368 /// bonus is applied if the vector instructions exceed 50% and half that
369 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
370 /// arbitrary and evolved over time by accident as much as because they are
371 /// principled bonuses.
372 /// FIXME: It would be nice to base the bonus values on something more
373 /// scientific. A target may has no bonus on vector instructions.
375
376 /// \return the expected cost of a memcpy, which could e.g. depend on the
377 /// source/destination type and alignment and the number of bytes copied.
379
380 /// Returns the maximum memset / memcpy size in bytes that still makes it
381 /// profitable to inline the call.
383
384 /// \return The estimated number of case clusters when lowering \p 'SI'.
385 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
386 /// table.
388 unsigned &JTSize,
390 BlockFrequencyInfo *BFI) const;
391
392 /// Estimate the cost of a given IR user when lowered.
393 ///
394 /// This can estimate the cost of either a ConstantExpr or Instruction when
395 /// lowered.
396 ///
397 /// \p Operands is a list of operands which can be a result of transformations
398 /// of the current operands. The number of the operands on the list must equal
399 /// to the number of the current operands the IR user has. Their order on the
400 /// list must be the same as the order of the current operands the IR user
401 /// has.
402 ///
403 /// The returned cost is defined in terms of \c TargetCostConstants, see its
404 /// comments for a detailed explanation of the cost values.
408
409 /// This is a helper function which calls the three-argument
410 /// getInstructionCost with \p Operands which are the current operands U has.
412 TargetCostKind CostKind) const {
413 SmallVector<const Value *, 4> Operands(U->operand_values());
415 }
416
417 /// If a branch or a select condition is skewed in one direction by more than
418 /// this factor, it is very likely to be predicted correctly.
420
421 /// Return true if branch divergence exists.
422 ///
423 /// Branch divergence has a significantly negative impact on GPU performance
424 /// when threads in the same wavefront take different paths due to conditional
425 /// branches.
426 ///
427 /// If \p F is passed, provides a context function. If \p F is known to only
428 /// execute in a single threaded environment, the target may choose to skip
429 /// uniformity analysis and assume all values are uniform.
430 bool hasBranchDivergence(const Function *F = nullptr) const;
431
432 /// Returns whether V is a source of divergence.
433 ///
434 /// This function provides the target-dependent information for
435 /// the target-independent UniformityAnalysis.
436 bool isSourceOfDivergence(const Value *V) const;
437
438 // Returns true for the target specific
439 // set of operations which produce uniform result
440 // even taking non-uniform arguments
441 bool isAlwaysUniform(const Value *V) const;
442
443 /// Query the target whether the specified address space cast from FromAS to
444 /// ToAS is valid.
445 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
446
447 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
448 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
449
450 /// Returns the address space ID for a target's 'flat' address space. Note
451 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
452 /// refers to as the generic address space. The flat address space is a
453 /// generic address space that can be used access multiple segments of memory
454 /// with different address spaces. Access of a memory location through a
455 /// pointer with this address space is expected to be legal but slower
456 /// compared to the same memory location accessed through a pointer with a
457 /// different address space.
458 //
459 /// This is for targets with different pointer representations which can
460 /// be converted with the addrspacecast instruction. If a pointer is converted
461 /// to this address space, optimizations should attempt to replace the access
462 /// with the source address space.
463 ///
464 /// \returns ~0u if the target does not have such a flat address space to
465 /// optimize away.
466 unsigned getFlatAddressSpace() const;
467
468 /// Return any intrinsic address operand indexes which may be rewritten if
469 /// they use a flat address space pointer.
470 ///
471 /// \returns true if the intrinsic was handled.
473 Intrinsic::ID IID) const;
474
475 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
476
477 /// Return true if globals in this address space can have initializers other
478 /// than `undef`.
480
481 unsigned getAssumedAddrSpace(const Value *V) const;
482
483 bool isSingleThreaded() const;
484
485 std::pair<const Value *, unsigned>
486 getPredicatedAddrSpace(const Value *V) const;
487
488 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
489 /// NewV, which has a different address space. This should happen for every
490 /// operand index that collectFlatAddressOperands returned for the intrinsic.
491 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
492 /// new value (which may be the original \p II with modified operands).
494 Value *NewV) const;
495
496 /// Test whether calls to a function lower to actual program function
497 /// calls.
498 ///
499 /// The idea is to test whether the program is likely to require a 'call'
500 /// instruction or equivalent in order to call the given function.
501 ///
502 /// FIXME: It's not clear that this is a good or useful query API. Client's
503 /// should probably move to simpler cost metrics using the above.
504 /// Alternatively, we could split the cost interface into distinct code-size
505 /// and execution-speed costs. This would allow modelling the core of this
506 /// query more accurately as a call is a single small instruction, but
507 /// incurs significant execution cost.
508 bool isLoweredToCall(const Function *F) const;
509
510 struct LSRCost {
511 /// TODO: Some of these could be merged. Also, a lexical ordering
512 /// isn't always optimal.
513 unsigned Insns;
514 unsigned NumRegs;
515 unsigned AddRecCost;
516 unsigned NumIVMuls;
517 unsigned NumBaseAdds;
518 unsigned ImmCost;
519 unsigned SetupCost;
520 unsigned ScaleCost;
521 };
522
523 /// Parameters that control the generic loop unrolling transformation.
525 /// The cost threshold for the unrolled loop. Should be relative to the
526 /// getInstructionCost values returned by this API, and the expectation is
527 /// that the unrolled loop's instructions when run through that interface
528 /// should not exceed this cost. However, this is only an estimate. Also,
529 /// specific loops may be unrolled even with a cost above this threshold if
530 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
531 /// restriction.
532 unsigned Threshold;
533 /// If complete unrolling will reduce the cost of the loop, we will boost
534 /// the Threshold by a certain percent to allow more aggressive complete
535 /// unrolling. This value provides the maximum boost percentage that we
536 /// can apply to Threshold (The value should be no less than 100).
537 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
538 /// MaxPercentThresholdBoost / 100)
539 /// E.g. if complete unrolling reduces the loop execution time by 50%
540 /// then we boost the threshold by the factor of 2x. If unrolling is not
541 /// expected to reduce the running time, then we do not increase the
542 /// threshold.
544 /// The cost threshold for the unrolled loop when optimizing for size (set
545 /// to UINT_MAX to disable).
547 /// The cost threshold for the unrolled loop, like Threshold, but used
548 /// for partial/runtime unrolling (set to UINT_MAX to disable).
550 /// The cost threshold for the unrolled loop when optimizing for size, like
551 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
552 /// UINT_MAX to disable).
554 /// A forced unrolling factor (the number of concatenated bodies of the
555 /// original loop in the unrolled loop body). When set to 0, the unrolling
556 /// transformation will select an unrolling factor based on the current cost
557 /// threshold and other factors.
558 unsigned Count;
559 /// Default unroll count for loops with run-time trip count.
561 // Set the maximum unrolling factor. The unrolling factor may be selected
562 // using the appropriate cost threshold, but may not exceed this number
563 // (set to UINT_MAX to disable). This does not apply in cases where the
564 // loop is being fully unrolled.
565 unsigned MaxCount;
566 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
567 /// to be overrided by a target gives more flexiblity on certain cases.
568 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
570 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
571 /// applies even if full unrolling is selected. This allows a target to fall
572 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
574 // Represents number of instructions optimized when "back edge"
575 // becomes "fall through" in unrolled loop.
576 // For now we count a conditional branch on a backedge and a comparison
577 // feeding it.
578 unsigned BEInsns;
579 /// Allow partial unrolling (unrolling of loops to expand the size of the
580 /// loop body, not only to eliminate small constant-trip-count loops).
582 /// Allow runtime unrolling (unrolling of loops to expand the size of the
583 /// loop body even when the number of loop iterations is not known at
584 /// compile time).
586 /// Allow generation of a loop remainder (extra iterations after unroll).
588 /// Allow emitting expensive instructions (such as divisions) when computing
589 /// the trip count of a loop for runtime unrolling.
591 /// Apply loop unroll on any kind of loop
592 /// (mainly to loops that fail runtime unrolling).
593 bool Force;
594 /// Allow using trip count upper bound to unroll loops.
596 /// Allow unrolling of all the iterations of the runtime loop remainder.
598 /// Allow unroll and jam. Used to enable unroll and jam for the target.
600 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
601 /// value above is used during unroll and jam for the outer loop size.
602 /// This value is used in the same manner to limit the size of the inner
603 /// loop.
605 /// Don't allow loop unrolling to simulate more than this number of
606 /// iterations when checking full unroll profitability
608 /// Don't disable runtime unroll for the loops which were vectorized.
610 };
611
612 /// Get target-customized preferences for the generic loop unrolling
613 /// transformation. The caller will initialize UP with the current
614 /// target-independent defaults.
617 OptimizationRemarkEmitter *ORE) const;
618
619 /// Query the target whether it would be profitable to convert the given loop
620 /// into a hardware loop.
623 HardwareLoopInfo &HWLoopInfo) const;
624
625 /// Query the target whether it would be prefered to create a predicated
626 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
628
629 /// Query the target what the preferred style of tail folding is.
630 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
631 /// may (or will never) overflow for the suggested VF/UF in the given loop.
632 /// Targets can use this information to select a more optimal tail folding
633 /// style. The value conservatively defaults to true, such that no assumptions
634 /// are made on overflow.
636 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
637
638 // Parameters that control the loop peeling transformation
640 /// A forced peeling factor (the number of bodied of the original loop
641 /// that should be peeled off before the loop body). When set to 0, the
642 /// a peeling factor based on profile information and other factors.
643 unsigned PeelCount;
644 /// Allow peeling off loop iterations.
646 /// Allow peeling off loop iterations for loop nests.
648 /// Allow peeling basing on profile. Uses to enable peeling off all
649 /// iterations basing on provided profile.
650 /// If the value is true the peeling cost model can decide to peel only
651 /// some iterations and in this case it will set this to false.
653 };
654
655 /// Get target-customized preferences for the generic loop peeling
656 /// transformation. The caller will initialize \p PP with the current
657 /// target-independent defaults with information from \p L and \p SE.
659 PeelingPreferences &PP) const;
660
661 /// Targets can implement their own combinations for target-specific
662 /// intrinsics. This function will be called from the InstCombine pass every
663 /// time a target-specific intrinsic is encountered.
664 ///
665 /// \returns std::nullopt to not do anything target specific or a value that
666 /// will be returned from the InstCombiner. It is possible to return null and
667 /// stop further processing of the intrinsic by returning nullptr.
668 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
669 IntrinsicInst & II) const;
670 /// Can be used to implement target-specific instruction combining.
671 /// \see instCombineIntrinsic
672 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
673 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
674 KnownBits & Known, bool &KnownBitsComputed) const;
675 /// Can be used to implement target-specific instruction combining.
676 /// \see instCombineIntrinsic
677 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
678 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
679 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
680 std::function<void(Instruction *, unsigned, APInt, APInt &)>
681 SimplifyAndSetOp) const;
682 /// @}
683
684 /// \name Scalar Target Information
685 /// @{
686
687 /// Flags indicating the kind of support for population count.
688 ///
689 /// Compared to the SW implementation, HW support is supposed to
690 /// significantly boost the performance when the population is dense, and it
691 /// may or may not degrade performance if the population is sparse. A HW
692 /// support is considered as "Fast" if it can outperform, or is on a par
693 /// with, SW implementation when the population is sparse; otherwise, it is
694 /// considered as "Slow".
696
697 /// Return true if the specified immediate is legal add immediate, that
698 /// is the target has add instructions which can add a register with the
699 /// immediate without having to materialize the immediate into a register.
700 bool isLegalAddImmediate(int64_t Imm) const;
701
702 /// Return true if adding the specified scalable immediate is legal, that is
703 /// the target has add instructions which can add a register with the
704 /// immediate (multiplied by vscale) without having to materialize the
705 /// immediate into a register.
706 bool isLegalAddScalableImmediate(int64_t Imm) const;
707
708 /// Return true if the specified immediate is legal icmp immediate,
709 /// that is the target has icmp instructions which can compare a register
710 /// against the immediate without having to materialize the immediate into a
711 /// register.
712 bool isLegalICmpImmediate(int64_t Imm) const;
713
714 /// Return true if the addressing mode represented by AM is legal for
715 /// this target, for a load/store of the specified type.
716 /// The type may be VoidTy, in which case only return true if the addressing
717 /// mode is legal for a load/store of any legal type.
718 /// If target returns true in LSRWithInstrQueries(), I may be valid.
719 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
720 /// an invariant value known only at runtime. Most targets should not accept
721 /// a scalable offset.
722 ///
723 /// TODO: Handle pre/postinc as well.
724 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
725 bool HasBaseReg, int64_t Scale,
726 unsigned AddrSpace = 0, Instruction *I = nullptr,
727 int64_t ScalableOffset = 0) const;
728
729 /// Return true if LSR cost of C1 is lower than C2.
731 const TargetTransformInfo::LSRCost &C2) const;
732
733 /// Return true if LSR major cost is number of registers. Targets which
734 /// implement their own isLSRCostLess and unset number of registers as major
735 /// cost should return false, otherwise return true.
736 bool isNumRegsMajorCostOfLSR() const;
737
738 /// Return true if LSR should attempts to replace a use of an otherwise dead
739 /// primary IV in the latch condition with another IV available in the loop.
740 /// When successful, makes the primary IV dead.
742
743 /// Return true if LSR should drop a found solution if it's calculated to be
744 /// less profitable than the baseline.
746
747 /// \returns true if LSR should not optimize a chain that includes \p I.
749
750 /// Return true if the target can fuse a compare and branch.
751 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
752 /// calculation for the instructions in a loop.
753 bool canMacroFuseCmp() const;
754
755 /// Return true if the target can save a compare for loop count, for example
756 /// hardware loop saves a compare.
757 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
759 TargetLibraryInfo *LibInfo) const;
760
765 };
766
767 /// Return the preferred addressing mode LSR should make efforts to generate.
769 ScalarEvolution *SE) const;
770
771 /// Return true if the target supports masked store.
772 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
773 /// Return true if the target supports masked load.
774 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
775
776 /// Return true if the target supports nontemporal store.
777 bool isLegalNTStore(Type *DataType, Align Alignment) const;
778 /// Return true if the target supports nontemporal load.
779 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
780
781 /// \Returns true if the target supports broadcasting a load to a vector of
782 /// type <NumElements x ElementTy>.
783 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
784
785 /// Return true if the target supports masked scatter.
786 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
787 /// Return true if the target supports masked gather.
788 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
789 /// Return true if the target forces scalarizing of llvm.masked.gather
790 /// intrinsics.
791 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
792 /// Return true if the target forces scalarizing of llvm.masked.scatter
793 /// intrinsics.
794 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
795
796 /// Return true if the target supports masked compress store.
797 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
798 /// Return true if the target supports masked expand load.
799 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
800
801 /// Return true if the target supports strided load.
802 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
803
804 // Return true if the target supports masked vector histograms.
805 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const;
806
807 /// Return true if this is an alternating opcode pattern that can be lowered
808 /// to a single instruction on the target. In X86 this is for the addsub
809 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
810 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
811 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
812 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
813 /// \p VecTy is the vector type of the instruction to be generated.
814 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
815 const SmallBitVector &OpcodeMask) const;
816
817 /// Return true if we should be enabling ordered reductions for the target.
818 bool enableOrderedReductions() const;
819
820 /// Return true if the target has a unified operation to calculate division
821 /// and remainder. If so, the additional implicit multiplication and
822 /// subtraction required to calculate a remainder from division are free. This
823 /// can enable more aggressive transformations for division and remainder than
824 /// would typically be allowed using throughput or size cost models.
825 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
826
827 /// Return true if the given instruction (assumed to be a memory access
828 /// instruction) has a volatile variant. If that's the case then we can avoid
829 /// addrspacecast to generic AS for volatile loads/stores. Default
830 /// implementation returns false, which prevents address space inference for
831 /// volatile loads/stores.
832 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
833
834 /// Return true if target doesn't mind addresses in vectors.
835 bool prefersVectorizedAddressing() const;
836
837 /// Return the cost of the scaling factor used in the addressing
838 /// mode represented by AM for this target, for a load/store
839 /// of the specified type.
840 /// If the AM is supported, the return value must be >= 0.
841 /// If the AM is not supported, it returns a negative value.
842 /// TODO: Handle pre/postinc as well.
844 StackOffset BaseOffset, bool HasBaseReg,
845 int64_t Scale,
846 unsigned AddrSpace = 0) const;
847
848 /// Return true if the loop strength reduce pass should make
849 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
850 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
851 /// immediate offset and no index register.
852 bool LSRWithInstrQueries() const;
853
854 /// Return true if it's free to truncate a value of type Ty1 to type
855 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
856 /// by referencing its sub-register AX.
857 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
858
859 /// Return true if it is profitable to hoist instruction in the
860 /// then/else to before if.
861 bool isProfitableToHoist(Instruction *I) const;
862
863 bool useAA() const;
864
865 /// Return true if this type is legal.
866 bool isTypeLegal(Type *Ty) const;
867
868 /// Returns the estimated number of registers required to represent \p Ty.
869 unsigned getRegUsageForType(Type *Ty) const;
870
871 /// Return true if switches should be turned into lookup tables for the
872 /// target.
873 bool shouldBuildLookupTables() const;
874
875 /// Return true if switches should be turned into lookup tables
876 /// containing this constant value for the target.
878
879 /// Return true if lookup tables should be turned into relative lookup tables.
880 bool shouldBuildRelLookupTables() const;
881
882 /// Return true if the input function which is cold at all call sites,
883 /// should use coldcc calling convention.
884 bool useColdCCForColdCall(Function &F) const;
885
886 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
887 /// are set if the demanded result elements need to be inserted and/or
888 /// extracted from vectors.
890 const APInt &DemandedElts,
891 bool Insert, bool Extract,
893
894 /// Estimate the overhead of scalarizing an instructions unique
895 /// non-constant operands. The (potentially vector) types to use for each of
896 /// argument are passes via Tys.
901
902 /// If target has efficient vector element load/store instructions, it can
903 /// return true here so that insertion/extraction costs are not added to
904 /// the scalarization cost of a load/store.
906
907 /// If the target supports tail calls.
908 bool supportsTailCalls() const;
909
910 /// If target supports tail call on \p CB
911 bool supportsTailCallFor(const CallBase *CB) const;
912
913 /// Don't restrict interleaved unrolling to small loops.
914 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
915
916 /// Returns options for expansion of memcmp. IsZeroCmp is
917 // true if this is the expansion of memcmp(p1, p2, s) == 0.
919 // Return true if memcmp expansion is enabled.
920 operator bool() const { return MaxNumLoads > 0; }
921
922 // Maximum number of load operations.
923 unsigned MaxNumLoads = 0;
924
925 // The list of available load sizes (in bytes), sorted in decreasing order.
927
928 // For memcmp expansion when the memcmp result is only compared equal or
929 // not-equal to 0, allow up to this number of load pairs per block. As an
930 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
931 // a0 = load2bytes &a[0]
932 // b0 = load2bytes &b[0]
933 // a2 = load1byte &a[2]
934 // b2 = load1byte &b[2]
935 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
936 unsigned NumLoadsPerBlock = 1;
937
938 // Set to true to allow overlapping loads. For example, 7-byte compares can
939 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
940 // requires all loads in LoadSizes to be doable in an unaligned way.
942
943 // Sometimes, the amount of data that needs to be compared is smaller than
944 // the standard register size, but it cannot be loaded with just one load
945 // instruction. For example, if the size of the memory comparison is 6
946 // bytes, we can handle it more efficiently by loading all 6 bytes in a
947 // single block and generating an 8-byte number, instead of generating two
948 // separate blocks with conditional jumps for 4 and 2 byte loads. This
949 // approach simplifies the process and produces the comparison result as
950 // normal. This array lists the allowed sizes of memcmp tails that can be
951 // merged into one block
953 };
955 bool IsZeroCmp) const;
956
957 /// Should the Select Optimization pass be enabled and ran.
958 bool enableSelectOptimize() const;
959
960 /// Should the Select Optimization pass treat the given instruction like a
961 /// select, potentially converting it to a conditional branch. This can
962 /// include select-like instructions like or(zext(c), x) that can be converted
963 /// to selects.
965
966 /// Enable matching of interleaved access groups.
968
969 /// Enable matching of interleaved access groups that contain predicated
970 /// accesses or gaps and therefore vectorized using masked
971 /// vector loads/stores.
973
974 /// Indicate that it is potentially unsafe to automatically vectorize
975 /// floating-point operations because the semantics of vector and scalar
976 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
977 /// does not support IEEE-754 denormal numbers, while depending on the
978 /// platform, scalar floating-point math does.
979 /// This applies to floating-point math operations and calls, not memory
980 /// operations, shuffles, or casts.
982
983 /// Determine if the target supports unaligned memory accesses.
985 unsigned AddressSpace = 0,
986 Align Alignment = Align(1),
987 unsigned *Fast = nullptr) const;
988
989 /// Return hardware support for population count.
990 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
991
992 /// Return true if the hardware has a fast square-root instruction.
993 bool haveFastSqrt(Type *Ty) const;
994
995 /// Return true if the cost of the instruction is too high to speculatively
996 /// execute and should be kept behind a branch.
997 /// This normally just wraps around a getInstructionCost() call, but some
998 /// targets might report a low TCK_SizeAndLatency value that is incompatible
999 /// with the fixed TCC_Expensive value.
1000 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1002
1003 /// Return true if it is faster to check if a floating-point value is NaN
1004 /// (or not-NaN) versus a comparison against a constant FP zero value.
1005 /// Targets should override this if materializing a 0.0 for comparison is
1006 /// generally as cheap as checking for ordered/unordered.
1007 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1008
1009 /// Return the expected cost of supporting the floating point operation
1010 /// of the specified type.
1011 InstructionCost getFPOpCost(Type *Ty) const;
1012
1013 /// Return the expected cost of materializing for the given integer
1014 /// immediate of the specified type.
1015 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1016 TargetCostKind CostKind) const;
1017
1018 /// Return the expected cost of materialization for the given integer
1019 /// immediate of the specified type for a given instruction. The cost can be
1020 /// zero if the immediate can be folded into the specified instruction.
1021 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1022 const APInt &Imm, Type *Ty,
1024 Instruction *Inst = nullptr) const;
1026 const APInt &Imm, Type *Ty,
1027 TargetCostKind CostKind) const;
1028
1029 /// Return the expected cost for the given integer when optimising
1030 /// for size. This is different than the other integer immediate cost
1031 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1032 /// target one ISA such as Aarch32 but smaller encodings could be possible
1033 /// with another such as Thumb. This return value is used as a penalty when
1034 /// the total costs for a constant is calculated (the bigger the cost, the
1035 /// more beneficial constant hoisting is).
1036 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1037 const APInt &Imm, Type *Ty) const;
1038
1039 /// It can be advantageous to detach complex constants from their uses to make
1040 /// their generation cheaper. This hook allows targets to report when such
1041 /// transformations might negatively effect the code generation of the
1042 /// underlying operation. The motivating example is divides whereby hoisting
1043 /// constants prevents the code generator's ability to transform them into
1044 /// combinations of simpler operations.
1046 const Function &Fn) const;
1047
1048 /// @}
1049
1050 /// \name Vector Target Information
1051 /// @{
1052
1053 /// The various kinds of shuffle patterns for vector queries.
1055 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1056 SK_Reverse, ///< Reverse the order of the vector.
1057 SK_Select, ///< Selects elements from the corresponding lane of
1058 ///< either source operand. This is equivalent to a
1059 ///< vector select with a constant condition operand.
1060 SK_Transpose, ///< Transpose two vectors.
1061 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1062 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1063 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1064 ///< with any shuffle mask.
1065 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1066 ///< shuffle mask.
1067 SK_Splice ///< Concatenates elements from the first input vector
1068 ///< with elements of the second input vector. Returning
1069 ///< a vector of the same type as the input vectors.
1070 ///< Index indicates start offset in first input vector.
1072
1073 /// Additional information about an operand's possible values.
1075 OK_AnyValue, // Operand can have any value.
1076 OK_UniformValue, // Operand is uniform (splat of a value).
1077 OK_UniformConstantValue, // Operand is uniform constant.
1078 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1080
1081 /// Additional properties of an operand's values.
1086 };
1087
1088 // Describe the values an operand can take. We're in the process
1089 // of migrating uses of OperandValueKind and OperandValueProperties
1090 // to use this class, and then will change the internal representation.
1094
1095 bool isConstant() const {
1097 }
1098 bool isUniform() const {
1100 }
1101 bool isPowerOf2() const {
1102 return Properties == OP_PowerOf2;
1103 }
1104 bool isNegatedPowerOf2() const {
1106 }
1107
1109 return {Kind, OP_None};
1110 }
1111 };
1112
1113 /// \return the number of registers in the target-provided register class.
1114 unsigned getNumberOfRegisters(unsigned ClassID) const;
1115
1116 /// \return the target-provided register class ID for the provided type,
1117 /// accounting for type promotion and other type-legalization techniques that
1118 /// the target might apply. However, it specifically does not account for the
1119 /// scalarization or splitting of vector types. Should a vector type require
1120 /// scalarization or splitting into multiple underlying vector registers, that
1121 /// type should be mapped to a register class containing no registers.
1122 /// Specifically, this is designed to provide a simple, high-level view of the
1123 /// register allocation later performed by the backend. These register classes
1124 /// don't necessarily map onto the register classes used by the backend.
1125 /// FIXME: It's not currently possible to determine how many registers
1126 /// are used by the provided type.
1127 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1128
1129 /// \return the target-provided register class name
1130 const char *getRegisterClassName(unsigned ClassID) const;
1131
1133
1134 /// \return The width of the largest scalar or vector register type.
1136
1137 /// \return The width of the smallest vector register type.
1138 unsigned getMinVectorRegisterBitWidth() const;
1139
1140 /// \return The maximum value of vscale if the target specifies an
1141 /// architectural maximum vector length, and std::nullopt otherwise.
1142 std::optional<unsigned> getMaxVScale() const;
1143
1144 /// \return the value of vscale to tune the cost model for.
1145 std::optional<unsigned> getVScaleForTuning() const;
1146
1147 /// \return true if vscale is known to be a power of 2
1148 bool isVScaleKnownToBeAPowerOfTwo() const;
1149
1150 /// \return True if the vectorization factor should be chosen to
1151 /// make the vector of the smallest element type match the size of a
1152 /// vector register. For wider element types, this could result in
1153 /// creating vectors that span multiple vector registers.
1154 /// If false, the vectorization factor will be chosen based on the
1155 /// size of the widest element type.
1156 /// \p K Register Kind for vectorization.
1158
1159 /// \return The minimum vectorization factor for types of given element
1160 /// bit width, or 0 if there is no minimum VF. The returned value only
1161 /// applies when shouldMaximizeVectorBandwidth returns true.
1162 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1163 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1164
1165 /// \return The maximum vectorization factor for types of given element
1166 /// bit width and opcode, or 0 if there is no maximum VF.
1167 /// Currently only used by the SLP vectorizer.
1168 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1169
1170 /// \return The minimum vectorization factor for the store instruction. Given
1171 /// the initial estimation of the minimum vector factor and store value type,
1172 /// it tries to find possible lowest VF, which still might be profitable for
1173 /// the vectorization.
1174 /// \param VF Initial estimation of the minimum vector factor.
1175 /// \param ScalarMemTy Scalar memory type of the store operation.
1176 /// \param ScalarValTy Scalar type of the stored value.
1177 /// Currently only used by the SLP vectorizer.
1178 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1179 Type *ScalarValTy) const;
1180
1181 /// \return True if it should be considered for address type promotion.
1182 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1183 /// profitable without finding other extensions fed by the same input.
1185 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1186
1187 /// \return The size of a cache line in bytes.
1188 unsigned getCacheLineSize() const;
1189
1190 /// The possible cache levels
1191 enum class CacheLevel {
1192 L1D, // The L1 data cache
1193 L2D, // The L2 data cache
1194
1195 // We currently do not model L3 caches, as their sizes differ widely between
1196 // microarchitectures. Also, we currently do not have a use for L3 cache
1197 // size modeling yet.
1198 };
1199
1200 /// \return The size of the cache level in bytes, if available.
1201 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1202
1203 /// \return The associativity of the cache level, if available.
1204 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1205
1206 /// \return The minimum architectural page size for the target.
1207 std::optional<unsigned> getMinPageSize() const;
1208
1209 /// \return How much before a load we should place the prefetch
1210 /// instruction. This is currently measured in number of
1211 /// instructions.
1212 unsigned getPrefetchDistance() const;
1213
1214 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1215 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1216 /// and the arguments provided are meant to serve as a basis for deciding this
1217 /// for a particular loop.
1218 ///
1219 /// \param NumMemAccesses Number of memory accesses in the loop.
1220 /// \param NumStridedMemAccesses Number of the memory accesses that
1221 /// ScalarEvolution could find a known stride
1222 /// for.
1223 /// \param NumPrefetches Number of software prefetches that will be
1224 /// emitted as determined by the addresses
1225 /// involved and the cache line size.
1226 /// \param HasCall True if the loop contains a call.
1227 ///
1228 /// \return This is the minimum stride in bytes where it makes sense to start
1229 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1230 /// stride.
1231 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1232 unsigned NumStridedMemAccesses,
1233 unsigned NumPrefetches, bool HasCall) const;
1234
1235 /// \return The maximum number of iterations to prefetch ahead. If
1236 /// the required number of iterations is more than this number, no
1237 /// prefetching is performed.
1238 unsigned getMaxPrefetchIterationsAhead() const;
1239
1240 /// \return True if prefetching should also be done for writes.
1241 bool enableWritePrefetching() const;
1242
1243 /// \return if target want to issue a prefetch in address space \p AS.
1244 bool shouldPrefetchAddressSpace(unsigned AS) const;
1245
1246 /// \return The maximum interleave factor that any transform should try to
1247 /// perform for this target. This number depends on the level of parallelism
1248 /// and the number of execution units in the CPU.
1249 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1250
1251 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1252 static OperandValueInfo getOperandInfo(const Value *V);
1253
1254 /// This is an approximation of reciprocal throughput of a math/logic op.
1255 /// A higher cost indicates less expected throughput.
1256 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1257 /// clock cycles per instruction when the instructions are not part of a
1258 /// limiting dependency chain."
1259 /// Therefore, costs should be scaled to account for multiple execution units
1260 /// on the target that can process this type of instruction. For example, if
1261 /// there are 5 scalar integer units and 2 vector integer units that can
1262 /// calculate an 'add' in a single cycle, this model should indicate that the
1263 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1264 /// add instruction.
1265 /// \p Args is an optional argument which holds the instruction operands
1266 /// values so the TTI can analyze those values searching for special
1267 /// cases or optimizations based on those values.
1268 /// \p CxtI is the optional original context instruction, if one exists, to
1269 /// provide even more information.
1270 /// \p TLibInfo is used to search for platform specific vector library
1271 /// functions for instructions that might be converted to calls (e.g. frem).
1273 unsigned Opcode, Type *Ty,
1276 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1277 ArrayRef<const Value *> Args = std::nullopt,
1278 const Instruction *CxtI = nullptr,
1279 const TargetLibraryInfo *TLibInfo = nullptr) const;
1280
1281 /// Returns the cost estimation for alternating opcode pattern that can be
1282 /// lowered to a single instruction on the target. In X86 this is for the
1283 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1284 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1285 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1286 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1287 /// \p VecTy is the vector type of the instruction to be generated.
1288 InstructionCost getAltInstrCost(
1289 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1290 const SmallBitVector &OpcodeMask,
1292
1293 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1294 /// The exact mask may be passed as Mask, or else the array will be empty.
1295 /// The index and subtype parameters are used by the subvector insertion and
1296 /// extraction shuffle kinds to show the insert/extract point and the type of
1297 /// the subvector being inserted/extracted. The operands of the shuffle can be
1298 /// passed through \p Args, which helps improve the cost estimation in some
1299 /// cases, like in broadcast loads.
1300 /// NOTE: For subvector extractions Tp represents the source type.
1301 InstructionCost getShuffleCost(
1302 ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
1304 VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
1305 const Instruction *CxtI = nullptr) const;
1306
1307 /// Represents a hint about the context in which a cast is used.
1308 ///
1309 /// For zext/sext, the context of the cast is the operand, which must be a
1310 /// load of some kind. For trunc, the context is of the cast is the single
1311 /// user of the instruction, which must be a store of some kind.
1312 ///
1313 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1314 /// type of cast it's dealing with, as not every cast is equal. For instance,
1315 /// the zext of a load may be free, but the zext of an interleaving load can
1316 //// be (very) expensive!
1317 ///
1318 /// See \c getCastContextHint to compute a CastContextHint from a cast
1319 /// Instruction*. Callers can use it if they don't need to override the
1320 /// context and just want it to be calculated from the instruction.
1321 ///
1322 /// FIXME: This handles the types of load/store that the vectorizer can
1323 /// produce, which are the cases where the context instruction is most
1324 /// likely to be incorrect. There are other situations where that can happen
1325 /// too, which might be handled here but in the long run a more general
1326 /// solution of costing multiple instructions at the same times may be better.
1327 enum class CastContextHint : uint8_t {
1328 None, ///< The cast is not used with a load/store of any kind.
1329 Normal, ///< The cast is used with a normal load/store.
1330 Masked, ///< The cast is used with a masked load/store.
1331 GatherScatter, ///< The cast is used with a gather/scatter.
1332 Interleave, ///< The cast is used with an interleaved load/store.
1333 Reversed, ///< The cast is used with a reversed load/store.
1334 };
1335
1336 /// Calculates a CastContextHint from \p I.
1337 /// This should be used by callers of getCastInstrCost if they wish to
1338 /// determine the context from some instruction.
1339 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1340 /// or if it's another type of cast.
1342
1343 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1344 /// zext, etc. If there is an existing instruction that holds Opcode, it
1345 /// may be passed in the 'I' parameter.
1347 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1350 const Instruction *I = nullptr) const;
1351
1352 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1353 /// Index = -1 to indicate that there is no information about the index value.
1354 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1355 VectorType *VecTy,
1356 unsigned Index) const;
1357
1358 /// \return The expected cost of control-flow related instructions such as
1359 /// Phi, Ret, Br, Switch.
1361 getCFInstrCost(unsigned Opcode,
1363 const Instruction *I = nullptr) const;
1364
1365 /// \returns The expected cost of compare and select instructions. If there
1366 /// is an existing instruction that holds Opcode, it may be passed in the
1367 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1368 /// is using a compare with the specified predicate as condition. When vector
1369 /// types are passed, \p VecPred must be used for all lanes.
1371 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1372 CmpInst::Predicate VecPred,
1374 const Instruction *I = nullptr) const;
1375
1376 /// \return The expected cost of vector Insert and Extract.
1377 /// Use -1 to indicate that there is no information on the index value.
1378 /// This is used when the instruction is not available; a typical use
1379 /// case is to provision the cost of vectorization/scalarization in
1380 /// vectorizer passes.
1381 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1383 unsigned Index = -1, Value *Op0 = nullptr,
1384 Value *Op1 = nullptr) const;
1385
1386 /// \return The expected cost of vector Insert and Extract.
1387 /// This is used when instruction is available, and implementation
1388 /// asserts 'I' is not nullptr.
1389 ///
1390 /// A typical suitable use case is cost estimation when vector instruction
1391 /// exists (e.g., from basic blocks during transformation).
1394 unsigned Index = -1) const;
1395
1396 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1397 /// \p ReplicationFactor times.
1398 ///
1399 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1400 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1401 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1402 int VF,
1403 const APInt &DemandedDstElts,
1405
1406 /// \return The cost of Load and Store instructions.
1408 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1409 unsigned AddressSpace,
1411 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1412 const Instruction *I = nullptr) const;
1413
1414 /// \return The cost of VP Load and Store instructions.
1415 InstructionCost
1416 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1417 unsigned AddressSpace,
1419 const Instruction *I = nullptr) const;
1420
1421 /// \return The cost of masked Load and Store instructions.
1423 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1425
1426 /// \return The cost of Gather or Scatter operation
1427 /// \p Opcode - is a type of memory access Load or Store
1428 /// \p DataTy - a vector type of the data to be loaded or stored
1429 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1430 /// \p VariableMask - true when the memory access is predicated with a mask
1431 /// that is not a compile-time constant
1432 /// \p Alignment - alignment of single element
1433 /// \p I - the optional original context instruction, if one exists, e.g. the
1434 /// load/store to transform or the call to the gather/scatter intrinsic
1436 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1438 const Instruction *I = nullptr) const;
1439
1440 /// \return The cost of strided memory operations.
1441 /// \p Opcode - is a type of memory access Load or Store
1442 /// \p DataTy - a vector type of the data to be loaded or stored
1443 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1444 /// \p VariableMask - true when the memory access is predicated with a mask
1445 /// that is not a compile-time constant
1446 /// \p Alignment - alignment of single element
1447 /// \p I - the optional original context instruction, if one exists, e.g. the
1448 /// load/store to transform or the call to the gather/scatter intrinsic
1450 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1452 const Instruction *I = nullptr) const;
1453
1454 /// \return The cost of the interleaved memory operation.
1455 /// \p Opcode is the memory operation code
1456 /// \p VecTy is the vector type of the interleaved access.
1457 /// \p Factor is the interleave factor
1458 /// \p Indices is the indices for interleaved load members (as interleaved
1459 /// load allows gaps)
1460 /// \p Alignment is the alignment of the memory operation
1461 /// \p AddressSpace is address space of the pointer.
1462 /// \p UseMaskForCond indicates if the memory access is predicated.
1463 /// \p UseMaskForGaps indicates if gaps should be masked.
1465 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1466 Align Alignment, unsigned AddressSpace,
1468 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1469
1470 /// A helper function to determine the type of reduction algorithm used
1471 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1472 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1473 return FMF && !(*FMF).allowReassoc();
1474 }
1475
1476 /// Calculate the cost of vector reduction intrinsics.
1477 ///
1478 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1479 /// value using the operation denoted by \p Opcode. The FastMathFlags
1480 /// parameter \p FMF indicates what type of reduction we are performing:
1481 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1482 /// involves successively splitting a vector into half and doing the
1483 /// operation on the pair of halves until you have a scalar value. For
1484 /// example:
1485 /// (v0, v1, v2, v3)
1486 /// ((v0+v2), (v1+v3), undef, undef)
1487 /// ((v0+v2+v1+v3), undef, undef, undef)
1488 /// This is the default behaviour for integer operations, whereas for
1489 /// floating point we only do this if \p FMF indicates that
1490 /// reassociation is allowed.
1491 /// 2. Ordered. For a vector with N elements this involves performing N
1492 /// operations in lane order, starting with an initial scalar value, i.e.
1493 /// result = InitVal + v0
1494 /// result = result + v1
1495 /// result = result + v2
1496 /// result = result + v3
1497 /// This is only the case for FP operations and when reassociation is not
1498 /// allowed.
1499 ///
1501 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1503
1507
1508 /// Calculate the cost of an extended reduction pattern, similar to
1509 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1510 /// extensions. This is the cost of as:
1511 /// ResTy vecreduce.add(mul (A, B)).
1512 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1514 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1516
1517 /// Calculate the cost of an extended reduction pattern, similar to
1518 /// getArithmeticReductionCost of a reduction with an extension.
1519 /// This is the cost of as:
1520 /// ResTy vecreduce.opcode(ext(Ty A)).
1522 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1523 FastMathFlags FMF,
1525
1526 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1527 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1528 /// 3. scalar instruction which is to be vectorized.
1531
1532 /// \returns The cost of Call instructions.
1536
1537 /// \returns The number of pieces into which the provided type must be
1538 /// split during legalization. Zero is returned when the answer is unknown.
1539 unsigned getNumberOfParts(Type *Tp) const;
1540
1541 /// \returns The cost of the address computation. For most targets this can be
1542 /// merged into the instruction indexing mode. Some targets might want to
1543 /// distinguish between address computation for memory operations on vector
1544 /// types and scalar types. Such targets should override this function.
1545 /// The 'SE' parameter holds pointer for the scalar evolution object which
1546 /// is used in order to get the Ptr step value in case of constant stride.
1547 /// The 'Ptr' parameter holds SCEV of the access pointer.
1549 ScalarEvolution *SE = nullptr,
1550 const SCEV *Ptr = nullptr) const;
1551
1552 /// \returns The cost, if any, of keeping values of the given types alive
1553 /// over a callsite.
1554 ///
1555 /// Some types may require the use of register classes that do not have
1556 /// any callee-saved registers, so would require a spill and fill.
1558
1559 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1560 /// will contain additional information - whether the intrinsic may write
1561 /// or read to memory, volatility and the pointer. Info is undefined
1562 /// if false is returned.
1564
1565 /// \returns The maximum element size, in bytes, for an element
1566 /// unordered-atomic memory intrinsic.
1567 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1568
1569 /// \returns A value which is the result of the given memory intrinsic. New
1570 /// instructions may be created to extract the result from the given intrinsic
1571 /// memory operation. Returns nullptr if the target cannot create a result
1572 /// from the given intrinsic.
1574 Type *ExpectedType) const;
1575
1576 /// \returns The type to use in a loop expansion of a memcpy call.
1578 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1579 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1580 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1581
1582 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1583 /// \param RemainingBytes The number of bytes to copy.
1584 ///
1585 /// Calculates the operand types to use when copying \p RemainingBytes of
1586 /// memory, where source and destination alignments are \p SrcAlign and
1587 /// \p DestAlign respectively.
1589 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1590 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1591 unsigned SrcAlign, unsigned DestAlign,
1592 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1593
1594 /// \returns True if the two functions have compatible attributes for inlining
1595 /// purposes.
1596 bool areInlineCompatible(const Function *Caller,
1597 const Function *Callee) const;
1598
1599 /// Returns a penalty for invoking call \p Call in \p F.
1600 /// For example, if a function F calls a function G, which in turn calls
1601 /// function H, then getInlineCallPenalty(F, H()) would return the
1602 /// penalty of calling H from F, e.g. after inlining G into F.
1603 /// \p DefaultCallPenalty is passed to give a default penalty that
1604 /// the target can amend or override.
1605 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1606 unsigned DefaultCallPenalty) const;
1607
1608 /// \returns True if the caller and callee agree on how \p Types will be
1609 /// passed to or returned from the callee.
1610 /// to the callee.
1611 /// \param Types List of types to check.
1612 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1613 const ArrayRef<Type *> &Types) const;
1614
1615 /// The type of load/store indexing.
1617 MIM_Unindexed, ///< No indexing.
1618 MIM_PreInc, ///< Pre-incrementing.
1619 MIM_PreDec, ///< Pre-decrementing.
1620 MIM_PostInc, ///< Post-incrementing.
1621 MIM_PostDec ///< Post-decrementing.
1623
1624 /// \returns True if the specified indexed load for the given type is legal.
1625 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1626
1627 /// \returns True if the specified indexed store for the given type is legal.
1628 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1629
1630 /// \returns The bitwidth of the largest vector type that should be used to
1631 /// load/store in the given address space.
1632 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1633
1634 /// \returns True if the load instruction is legal to vectorize.
1635 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1636
1637 /// \returns True if the store instruction is legal to vectorize.
1638 bool isLegalToVectorizeStore(StoreInst *SI) const;
1639
1640 /// \returns True if it is legal to vectorize the given load chain.
1641 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1642 unsigned AddrSpace) const;
1643
1644 /// \returns True if it is legal to vectorize the given store chain.
1645 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1646 unsigned AddrSpace) const;
1647
1648 /// \returns True if it is legal to vectorize the given reduction kind.
1650 ElementCount VF) const;
1651
1652 /// \returns True if the given type is supported for scalable vectors
1654
1655 /// \returns The new vector factor value if the target doesn't support \p
1656 /// SizeInBytes loads or has a better vector factor.
1657 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1658 unsigned ChainSizeInBytes,
1659 VectorType *VecTy) const;
1660
1661 /// \returns The new vector factor value if the target doesn't support \p
1662 /// SizeInBytes stores or has a better vector factor.
1663 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1664 unsigned ChainSizeInBytes,
1665 VectorType *VecTy) const;
1666
1667 /// Flags describing the kind of vector reduction.
1669 ReductionFlags() = default;
1670 bool IsMaxOp =
1671 false; ///< If the op a min/max kind, true if it's a max operation.
1672 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1673 bool NoNaN =
1674 false; ///< If op is an fp min/max, whether NaNs may be present.
1675 };
1676
1677 /// \returns True if the target prefers reductions in loop.
1678 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1679 ReductionFlags Flags) const;
1680
1681 /// \returns True if the target prefers reductions select kept in the loop
1682 /// when tail folding. i.e.
1683 /// loop:
1684 /// p = phi (0, s)
1685 /// a = add (p, x)
1686 /// s = select (mask, a, p)
1687 /// vecreduce.add(s)
1688 ///
1689 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1690 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1691 /// by the target, this can lead to cleaner code generation.
1692 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1693 ReductionFlags Flags) const;
1694
1695 /// Return true if the loop vectorizer should consider vectorizing an
1696 /// otherwise scalar epilogue loop.
1697 bool preferEpilogueVectorization() const;
1698
1699 /// \returns True if the target wants to expand the given reduction intrinsic
1700 /// into a shuffle sequence.
1701 bool shouldExpandReduction(const IntrinsicInst *II) const;
1702
1703 /// \returns the size cost of rematerializing a GlobalValue address relative
1704 /// to a stack reload.
1705 unsigned getGISelRematGlobalCost() const;
1706
1707 /// \returns the lower bound of a trip count to decide on vectorization
1708 /// while tail-folding.
1709 unsigned getMinTripCountTailFoldingThreshold() const;
1710
1711 /// \returns True if the target supports scalable vectors.
1712 bool supportsScalableVectors() const;
1713
1714 /// \return true when scalable vectorization is preferred.
1715 bool enableScalableVectorization() const;
1716
1717 /// \name Vector Predication Information
1718 /// @{
1719 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1720 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1721 /// Reference - "Vector Predication Intrinsics").
1722 /// Use of %evl is discouraged when that is not the case.
1723 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1724 Align Alignment) const;
1725
1728 // keep the predicating parameter
1730 // where legal, discard the predicate parameter
1732 // transform into something else that is also predicating
1733 Convert = 2
1735
1736 // How to transform the EVL parameter.
1737 // Legal: keep the EVL parameter as it is.
1738 // Discard: Ignore the EVL parameter where it is safe to do so.
1739 // Convert: Fold the EVL into the mask parameter.
1741
1742 // How to transform the operator.
1743 // Legal: The target supports this operator.
1744 // Convert: Convert this to a non-VP operation.
1745 // The 'Discard' strategy is invalid.
1747
1748 bool shouldDoNothing() const {
1749 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1750 }
1753 };
1754
1755 /// \returns How the target needs this vector-predicated operation to be
1756 /// transformed.
1758 /// @}
1759
1760 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1761 /// state.
1762 ///
1763 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1764 /// node containing a jump table in a format suitable for the target, so it
1765 /// needs to know what format of jump table it can legally use.
1766 ///
1767 /// For non-Arm targets, this function isn't used. It defaults to returning
1768 /// false, but it shouldn't matter what it returns anyway.
1769 bool hasArmWideBranch(bool Thumb) const;
1770
1771 /// \return The maximum number of function arguments the target supports.
1772 unsigned getMaxNumArgs() const;
1773
1774 /// @}
1775
1776private:
1777 /// The abstract base class used to type erase specific TTI
1778 /// implementations.
1779 class Concept;
1780
1781 /// The template model for the base class which wraps a concrete
1782 /// implementation in a type erased interface.
1783 template <typename T> class Model;
1784
1785 std::unique_ptr<Concept> TTIImpl;
1786};
1787
1789public:
1790 virtual ~Concept() = 0;
1791 virtual const DataLayout &getDataLayout() const = 0;
1792 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1794 Type *AccessType,
1796 virtual InstructionCost
1798 const TTI::PointersChainInfo &Info, Type *AccessTy,
1800 virtual unsigned getInliningThresholdMultiplier() const = 0;
1802 virtual unsigned
1804 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1805 virtual int getInlinerVectorBonusPercent() const = 0;
1806 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1807 const AllocaInst *AI) const = 0;
1810 virtual unsigned
1812 ProfileSummaryInfo *PSI,
1813 BlockFrequencyInfo *BFI) = 0;
1818 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1819 virtual bool isSourceOfDivergence(const Value *V) = 0;
1820 virtual bool isAlwaysUniform(const Value *V) = 0;
1821 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1822 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1823 virtual unsigned getFlatAddressSpace() = 0;
1825 Intrinsic::ID IID) const = 0;
1826 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1827 virtual bool
1829 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1830 virtual bool isSingleThreaded() const = 0;
1831 virtual std::pair<const Value *, unsigned>
1832 getPredicatedAddrSpace(const Value *V) const = 0;
1834 Value *OldV,
1835 Value *NewV) const = 0;
1836 virtual bool isLoweredToCall(const Function *F) = 0;
1839 OptimizationRemarkEmitter *ORE) = 0;
1841 PeelingPreferences &PP) = 0;
1843 AssumptionCache &AC,
1844 TargetLibraryInfo *LibInfo,
1845 HardwareLoopInfo &HWLoopInfo) = 0;
1847 virtual TailFoldingStyle
1848 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1849 virtual std::optional<Instruction *> instCombineIntrinsic(
1850 InstCombiner &IC, IntrinsicInst &II) = 0;
1851 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1852 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1853 KnownBits & Known, bool &KnownBitsComputed) = 0;
1854 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1855 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1856 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1857 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1858 SimplifyAndSetOp) = 0;
1859 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1860 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
1861 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1862 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1863 int64_t BaseOffset, bool HasBaseReg,
1864 int64_t Scale, unsigned AddrSpace,
1865 Instruction *I,
1866 int64_t ScalableOffset) = 0;
1868 const TargetTransformInfo::LSRCost &C2) = 0;
1869 virtual bool isNumRegsMajorCostOfLSR() = 0;
1873 virtual bool canMacroFuseCmp() = 0;
1874 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1876 TargetLibraryInfo *LibInfo) = 0;
1877 virtual AddressingModeKind
1879 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1880 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1881 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1882 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1883 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1884 ElementCount NumElements) const = 0;
1885 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1886 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1888 Align Alignment) = 0;
1890 Align Alignment) = 0;
1891 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1892 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1893 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1894 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) = 0;
1895 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1896 unsigned Opcode1,
1897 const SmallBitVector &OpcodeMask) const = 0;
1898 virtual bool enableOrderedReductions() = 0;
1899 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1900 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1903 StackOffset BaseOffset,
1904 bool HasBaseReg, int64_t Scale,
1905 unsigned AddrSpace) = 0;
1906 virtual bool LSRWithInstrQueries() = 0;
1907 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1909 virtual bool useAA() = 0;
1910 virtual bool isTypeLegal(Type *Ty) = 0;
1911 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1912 virtual bool shouldBuildLookupTables() = 0;
1914 virtual bool shouldBuildRelLookupTables() = 0;
1915 virtual bool useColdCCForColdCall(Function &F) = 0;
1917 const APInt &DemandedElts,
1918 bool Insert, bool Extract,
1920 virtual InstructionCost
1922 ArrayRef<Type *> Tys,
1925 virtual bool supportsTailCalls() = 0;
1926 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1927 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1929 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1930 virtual bool enableSelectOptimize() = 0;
1936 unsigned BitWidth,
1937 unsigned AddressSpace,
1938 Align Alignment,
1939 unsigned *Fast) = 0;
1940 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1941 virtual bool haveFastSqrt(Type *Ty) = 0;
1943 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1945 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1946 const APInt &Imm, Type *Ty) = 0;
1947 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1949 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1950 const APInt &Imm, Type *Ty,
1952 Instruction *Inst = nullptr) = 0;
1954 const APInt &Imm, Type *Ty,
1957 const Function &Fn) const = 0;
1958 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1959 virtual unsigned getRegisterClassForType(bool Vector,
1960 Type *Ty = nullptr) const = 0;
1961 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1963 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1964 virtual std::optional<unsigned> getMaxVScale() const = 0;
1965 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1966 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1967 virtual bool
1969 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1970 bool IsScalable) const = 0;
1971 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1972 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1973 Type *ScalarValTy) const = 0;
1975 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1976 virtual unsigned getCacheLineSize() const = 0;
1977 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1978 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1979 const = 0;
1980 virtual std::optional<unsigned> getMinPageSize() const = 0;
1981
1982 /// \return How much before a load we should place the prefetch
1983 /// instruction. This is currently measured in number of
1984 /// instructions.
1985 virtual unsigned getPrefetchDistance() const = 0;
1986
1987 /// \return Some HW prefetchers can handle accesses up to a certain
1988 /// constant stride. This is the minimum stride in bytes where it
1989 /// makes sense to start adding SW prefetches. The default is 1,
1990 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1991 /// even below the HW prefetcher limit, and the arguments provided are
1992 /// meant to serve as a basis for deciding this for a particular loop.
1993 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1994 unsigned NumStridedMemAccesses,
1995 unsigned NumPrefetches,
1996 bool HasCall) const = 0;
1997
1998 /// \return The maximum number of iterations to prefetch ahead. If
1999 /// the required number of iterations is more than this number, no
2000 /// prefetching is performed.
2001 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
2002
2003 /// \return True if prefetching should also be done for writes.
2004 virtual bool enableWritePrefetching() const = 0;
2005
2006 /// \return if target want to issue a prefetch in address space \p AS.
2007 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
2008
2009 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2011 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2012 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2013 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2015 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2016 const SmallBitVector &OpcodeMask,
2018
2019 virtual InstructionCost
2022 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2023 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2024 Type *Src, CastContextHint CCH,
2026 const Instruction *I) = 0;
2027 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2028 VectorType *VecTy,
2029 unsigned Index) = 0;
2030 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2032 const Instruction *I = nullptr) = 0;
2033 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2034 Type *CondTy,
2035 CmpInst::Predicate VecPred,
2037 const Instruction *I) = 0;
2038 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2040 unsigned Index, Value *Op0,
2041 Value *Op1) = 0;
2044 unsigned Index) = 0;
2045
2046 virtual InstructionCost
2047 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2048 const APInt &DemandedDstElts,
2050
2051 virtual InstructionCost
2052 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2054 OperandValueInfo OpInfo, const Instruction *I) = 0;
2055 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2056 Align Alignment,
2057 unsigned AddressSpace,
2059 const Instruction *I) = 0;
2060 virtual InstructionCost
2061 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2062 unsigned AddressSpace,
2064 virtual InstructionCost
2065 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2066 bool VariableMask, Align Alignment,
2068 const Instruction *I = nullptr) = 0;
2069 virtual InstructionCost
2070 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2071 bool VariableMask, Align Alignment,
2073 const Instruction *I = nullptr) = 0;
2074
2076 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2077 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2078 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2079 virtual InstructionCost
2081 std::optional<FastMathFlags> FMF,
2083 virtual InstructionCost
2087 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2088 FastMathFlags FMF,
2091 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2093 virtual InstructionCost
2097 ArrayRef<Type *> Tys,
2099 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2100 virtual InstructionCost
2102 virtual InstructionCost
2105 MemIntrinsicInfo &Info) = 0;
2106 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2108 Type *ExpectedType) = 0;
2110 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2111 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2112 std::optional<uint32_t> AtomicElementSize) const = 0;
2113
2115 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2116 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2117 unsigned SrcAlign, unsigned DestAlign,
2118 std::optional<uint32_t> AtomicCpySize) const = 0;
2119 virtual bool areInlineCompatible(const Function *Caller,
2120 const Function *Callee) const = 0;
2121 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2122 unsigned DefaultCallPenalty) const = 0;
2123 virtual bool areTypesABICompatible(const Function *Caller,
2124 const Function *Callee,
2125 const ArrayRef<Type *> &Types) const = 0;
2126 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2127 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2128 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2129 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2130 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2131 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2132 Align Alignment,
2133 unsigned AddrSpace) const = 0;
2134 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2135 Align Alignment,
2136 unsigned AddrSpace) const = 0;
2138 ElementCount VF) const = 0;
2139 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2140 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2141 unsigned ChainSizeInBytes,
2142 VectorType *VecTy) const = 0;
2143 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2144 unsigned ChainSizeInBytes,
2145 VectorType *VecTy) const = 0;
2146 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2147 ReductionFlags) const = 0;
2148 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2149 ReductionFlags) const = 0;
2150 virtual bool preferEpilogueVectorization() const = 0;
2151
2152 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2153 virtual unsigned getGISelRematGlobalCost() const = 0;
2154 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2155 virtual bool enableScalableVectorization() const = 0;
2156 virtual bool supportsScalableVectors() const = 0;
2157 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2158 Align Alignment) const = 0;
2159 virtual VPLegalization
2161 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2162 virtual unsigned getMaxNumArgs() const = 0;
2163};
2164
2165template <typename T>
2166class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2167 T Impl;
2168
2169public:
2170 Model(T Impl) : Impl(std::move(Impl)) {}
2171 ~Model() override = default;
2172
2173 const DataLayout &getDataLayout() const override {
2174 return Impl.getDataLayout();
2175 }
2176
2177 InstructionCost
2178 getGEPCost(Type *PointeeType, const Value *Ptr,
2179 ArrayRef<const Value *> Operands, Type *AccessType,
2181 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2182 }
2183 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2184 const Value *Base,
2185 const PointersChainInfo &Info,
2186 Type *AccessTy,
2187 TargetCostKind CostKind) override {
2188 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2189 }
2190 unsigned getInliningThresholdMultiplier() const override {
2191 return Impl.getInliningThresholdMultiplier();
2192 }
2193 unsigned adjustInliningThreshold(const CallBase *CB) override {
2194 return Impl.adjustInliningThreshold(CB);
2195 }
2196 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2197 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2198 }
2199 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2200 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2201 }
2202 int getInlinerVectorBonusPercent() const override {
2203 return Impl.getInlinerVectorBonusPercent();
2204 }
2205 unsigned getCallerAllocaCost(const CallBase *CB,
2206 const AllocaInst *AI) const override {
2207 return Impl.getCallerAllocaCost(CB, AI);
2208 }
2209 InstructionCost getMemcpyCost(const Instruction *I) override {
2210 return Impl.getMemcpyCost(I);
2211 }
2212
2213 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2214 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2215 }
2216
2217 InstructionCost getInstructionCost(const User *U,
2218 ArrayRef<const Value *> Operands,
2219 TargetCostKind CostKind) override {
2220 return Impl.getInstructionCost(U, Operands, CostKind);
2221 }
2222 BranchProbability getPredictableBranchThreshold() override {
2223 return Impl.getPredictableBranchThreshold();
2224 }
2225 bool hasBranchDivergence(const Function *F = nullptr) override {
2226 return Impl.hasBranchDivergence(F);
2227 }
2228 bool isSourceOfDivergence(const Value *V) override {
2229 return Impl.isSourceOfDivergence(V);
2230 }
2231
2232 bool isAlwaysUniform(const Value *V) override {
2233 return Impl.isAlwaysUniform(V);
2234 }
2235
2236 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2237 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2238 }
2239
2240 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2241 return Impl.addrspacesMayAlias(AS0, AS1);
2242 }
2243
2244 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2245
2246 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2247 Intrinsic::ID IID) const override {
2248 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2249 }
2250
2251 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2252 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2253 }
2254
2255 bool
2256 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2257 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2258 }
2259
2260 unsigned getAssumedAddrSpace(const Value *V) const override {
2261 return Impl.getAssumedAddrSpace(V);
2262 }
2263
2264 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2265
2266 std::pair<const Value *, unsigned>
2267 getPredicatedAddrSpace(const Value *V) const override {
2268 return Impl.getPredicatedAddrSpace(V);
2269 }
2270
2271 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2272 Value *NewV) const override {
2273 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2274 }
2275
2276 bool isLoweredToCall(const Function *F) override {
2277 return Impl.isLoweredToCall(F);
2278 }
2279 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2280 UnrollingPreferences &UP,
2281 OptimizationRemarkEmitter *ORE) override {
2282 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2283 }
2284 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2285 PeelingPreferences &PP) override {
2286 return Impl.getPeelingPreferences(L, SE, PP);
2287 }
2288 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2289 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2290 HardwareLoopInfo &HWLoopInfo) override {
2291 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2292 }
2293 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2294 return Impl.preferPredicateOverEpilogue(TFI);
2295 }
2297 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2298 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2299 }
2300 std::optional<Instruction *>
2301 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2302 return Impl.instCombineIntrinsic(IC, II);
2303 }
2304 std::optional<Value *>
2305 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2306 APInt DemandedMask, KnownBits &Known,
2307 bool &KnownBitsComputed) override {
2308 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2309 KnownBitsComputed);
2310 }
2311 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2312 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2313 APInt &UndefElts2, APInt &UndefElts3,
2314 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2315 SimplifyAndSetOp) override {
2316 return Impl.simplifyDemandedVectorEltsIntrinsic(
2317 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2318 SimplifyAndSetOp);
2319 }
2320 bool isLegalAddImmediate(int64_t Imm) override {
2321 return Impl.isLegalAddImmediate(Imm);
2322 }
2323 bool isLegalAddScalableImmediate(int64_t Imm) override {
2324 return Impl.isLegalAddScalableImmediate(Imm);
2325 }
2326 bool isLegalICmpImmediate(int64_t Imm) override {
2327 return Impl.isLegalICmpImmediate(Imm);
2328 }
2329 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2330 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2331 Instruction *I, int64_t ScalableOffset) override {
2332 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2333 AddrSpace, I, ScalableOffset);
2334 }
2335 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2336 const TargetTransformInfo::LSRCost &C2) override {
2337 return Impl.isLSRCostLess(C1, C2);
2338 }
2339 bool isNumRegsMajorCostOfLSR() override {
2340 return Impl.isNumRegsMajorCostOfLSR();
2341 }
2342 bool shouldFoldTerminatingConditionAfterLSR() const override {
2343 return Impl.shouldFoldTerminatingConditionAfterLSR();
2344 }
2345 bool shouldDropLSRSolutionIfLessProfitable() const override {
2346 return Impl.shouldDropLSRSolutionIfLessProfitable();
2347 }
2348 bool isProfitableLSRChainElement(Instruction *I) override {
2349 return Impl.isProfitableLSRChainElement(I);
2350 }
2351 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2352 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2353 DominatorTree *DT, AssumptionCache *AC,
2354 TargetLibraryInfo *LibInfo) override {
2355 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2356 }
2358 getPreferredAddressingMode(const Loop *L,
2359 ScalarEvolution *SE) const override {
2360 return Impl.getPreferredAddressingMode(L, SE);
2361 }
2362 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2363 return Impl.isLegalMaskedStore(DataType, Alignment);
2364 }
2365 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2366 return Impl.isLegalMaskedLoad(DataType, Alignment);
2367 }
2368 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2369 return Impl.isLegalNTStore(DataType, Alignment);
2370 }
2371 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2372 return Impl.isLegalNTLoad(DataType, Alignment);
2373 }
2374 bool isLegalBroadcastLoad(Type *ElementTy,
2375 ElementCount NumElements) const override {
2376 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2377 }
2378 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2379 return Impl.isLegalMaskedScatter(DataType, Alignment);
2380 }
2381 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2382 return Impl.isLegalMaskedGather(DataType, Alignment);
2383 }
2384 bool forceScalarizeMaskedGather(VectorType *DataType,
2385 Align Alignment) override {
2386 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2387 }
2388 bool forceScalarizeMaskedScatter(VectorType *DataType,
2389 Align Alignment) override {
2390 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2391 }
2392 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2393 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2394 }
2395 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2396 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2397 }
2398 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2399 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2400 }
2401 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) override {
2402 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2403 }
2404 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2405 const SmallBitVector &OpcodeMask) const override {
2406 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2407 }
2408 bool enableOrderedReductions() override {
2409 return Impl.enableOrderedReductions();
2410 }
2411 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2412 return Impl.hasDivRemOp(DataType, IsSigned);
2413 }
2414 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2415 return Impl.hasVolatileVariant(I, AddrSpace);
2416 }
2417 bool prefersVectorizedAddressing() override {
2418 return Impl.prefersVectorizedAddressing();
2419 }
2420 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2421 StackOffset BaseOffset, bool HasBaseReg,
2422 int64_t Scale,
2423 unsigned AddrSpace) override {
2424 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2425 AddrSpace);
2426 }
2427 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2428 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2429 return Impl.isTruncateFree(Ty1, Ty2);
2430 }
2431 bool isProfitableToHoist(Instruction *I) override {
2432 return Impl.isProfitableToHoist(I);
2433 }
2434 bool useAA() override { return Impl.useAA(); }
2435 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2436 unsigned getRegUsageForType(Type *Ty) override {
2437 return Impl.getRegUsageForType(Ty);
2438 }
2439 bool shouldBuildLookupTables() override {
2440 return Impl.shouldBuildLookupTables();
2441 }
2442 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2443 return Impl.shouldBuildLookupTablesForConstant(C);
2444 }
2445 bool shouldBuildRelLookupTables() override {
2446 return Impl.shouldBuildRelLookupTables();
2447 }
2448 bool useColdCCForColdCall(Function &F) override {
2449 return Impl.useColdCCForColdCall(F);
2450 }
2451
2452 InstructionCost getScalarizationOverhead(VectorType *Ty,
2453 const APInt &DemandedElts,
2454 bool Insert, bool Extract,
2455 TargetCostKind CostKind) override {
2456 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2457 CostKind);
2458 }
2459 InstructionCost
2460 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2461 ArrayRef<Type *> Tys,
2462 TargetCostKind CostKind) override {
2463 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2464 }
2465
2466 bool supportsEfficientVectorElementLoadStore() override {
2467 return Impl.supportsEfficientVectorElementLoadStore();
2468 }
2469
2470 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2471 bool supportsTailCallFor(const CallBase *CB) override {
2472 return Impl.supportsTailCallFor(CB);
2473 }
2474
2475 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2476 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2477 }
2478 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2479 bool IsZeroCmp) const override {
2480 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2481 }
2482 bool enableSelectOptimize() override {
2483 return Impl.enableSelectOptimize();
2484 }
2485 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2486 return Impl.shouldTreatInstructionLikeSelect(I);
2487 }
2488 bool enableInterleavedAccessVectorization() override {
2489 return Impl.enableInterleavedAccessVectorization();
2490 }
2491 bool enableMaskedInterleavedAccessVectorization() override {
2492 return Impl.enableMaskedInterleavedAccessVectorization();
2493 }
2494 bool isFPVectorizationPotentiallyUnsafe() override {
2495 return Impl.isFPVectorizationPotentiallyUnsafe();
2496 }
2497 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2498 unsigned AddressSpace, Align Alignment,
2499 unsigned *Fast) override {
2500 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2501 Alignment, Fast);
2502 }
2503 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2504 return Impl.getPopcntSupport(IntTyWidthInBit);
2505 }
2506 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2507
2508 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2509 return Impl.isExpensiveToSpeculativelyExecute(I);
2510 }
2511
2512 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2513 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2514 }
2515
2516 InstructionCost getFPOpCost(Type *Ty) override {
2517 return Impl.getFPOpCost(Ty);
2518 }
2519
2520 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2521 const APInt &Imm, Type *Ty) override {
2522 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2523 }
2524 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2525 TargetCostKind CostKind) override {
2526 return Impl.getIntImmCost(Imm, Ty, CostKind);
2527 }
2528 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2529 const APInt &Imm, Type *Ty,
2531 Instruction *Inst = nullptr) override {
2532 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2533 }
2534 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2535 const APInt &Imm, Type *Ty,
2536 TargetCostKind CostKind) override {
2537 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2538 }
2539 bool preferToKeepConstantsAttached(const Instruction &Inst,
2540 const Function &Fn) const override {
2541 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2542 }
2543 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2544 return Impl.getNumberOfRegisters(ClassID);
2545 }
2546 unsigned getRegisterClassForType(bool Vector,
2547 Type *Ty = nullptr) const override {
2548 return Impl.getRegisterClassForType(Vector, Ty);
2549 }
2550 const char *getRegisterClassName(unsigned ClassID) const override {
2551 return Impl.getRegisterClassName(ClassID);
2552 }
2553 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2554 return Impl.getRegisterBitWidth(K);
2555 }
2556 unsigned getMinVectorRegisterBitWidth() const override {
2557 return Impl.getMinVectorRegisterBitWidth();
2558 }
2559 std::optional<unsigned> getMaxVScale() const override {
2560 return Impl.getMaxVScale();
2561 }
2562 std::optional<unsigned> getVScaleForTuning() const override {
2563 return Impl.getVScaleForTuning();
2564 }
2565 bool isVScaleKnownToBeAPowerOfTwo() const override {
2566 return Impl.isVScaleKnownToBeAPowerOfTwo();
2567 }
2568 bool shouldMaximizeVectorBandwidth(
2569 TargetTransformInfo::RegisterKind K) const override {
2570 return Impl.shouldMaximizeVectorBandwidth(K);
2571 }
2572 ElementCount getMinimumVF(unsigned ElemWidth,
2573 bool IsScalable) const override {
2574 return Impl.getMinimumVF(ElemWidth, IsScalable);
2575 }
2576 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2577 return Impl.getMaximumVF(ElemWidth, Opcode);
2578 }
2579 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2580 Type *ScalarValTy) const override {
2581 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2582 }
2583 bool shouldConsiderAddressTypePromotion(
2584 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2585 return Impl.shouldConsiderAddressTypePromotion(
2586 I, AllowPromotionWithoutCommonHeader);
2587 }
2588 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2589 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2590 return Impl.getCacheSize(Level);
2591 }
2592 std::optional<unsigned>
2593 getCacheAssociativity(CacheLevel Level) const override {
2594 return Impl.getCacheAssociativity(Level);
2595 }
2596
2597 std::optional<unsigned> getMinPageSize() const override {
2598 return Impl.getMinPageSize();
2599 }
2600
2601 /// Return the preferred prefetch distance in terms of instructions.
2602 ///
2603 unsigned getPrefetchDistance() const override {
2604 return Impl.getPrefetchDistance();
2605 }
2606
2607 /// Return the minimum stride necessary to trigger software
2608 /// prefetching.
2609 ///
2610 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2611 unsigned NumStridedMemAccesses,
2612 unsigned NumPrefetches,
2613 bool HasCall) const override {
2614 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2615 NumPrefetches, HasCall);
2616 }
2617
2618 /// Return the maximum prefetch distance in terms of loop
2619 /// iterations.
2620 ///
2621 unsigned getMaxPrefetchIterationsAhead() const override {
2622 return Impl.getMaxPrefetchIterationsAhead();
2623 }
2624
2625 /// \return True if prefetching should also be done for writes.
2626 bool enableWritePrefetching() const override {
2627 return Impl.enableWritePrefetching();
2628 }
2629
2630 /// \return if target want to issue a prefetch in address space \p AS.
2631 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2632 return Impl.shouldPrefetchAddressSpace(AS);
2633 }
2634
2635 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2636 return Impl.getMaxInterleaveFactor(VF);
2637 }
2638 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2639 unsigned &JTSize,
2640 ProfileSummaryInfo *PSI,
2641 BlockFrequencyInfo *BFI) override {
2642 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2643 }
2644 InstructionCost getArithmeticInstrCost(
2645 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2646 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2647 ArrayRef<const Value *> Args,
2648 const Instruction *CxtI = nullptr) override {
2649 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2650 Args, CxtI);
2651 }
2652 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2653 unsigned Opcode1,
2654 const SmallBitVector &OpcodeMask,
2655 TTI::TargetCostKind CostKind) const override {
2656 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2657 }
2658
2659 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2660 ArrayRef<int> Mask,
2662 VectorType *SubTp,
2663 ArrayRef<const Value *> Args,
2664 const Instruction *CxtI) override {
2665 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2666 CxtI);
2667 }
2668 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2669 CastContextHint CCH,
2671 const Instruction *I) override {
2672 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2673 }
2674 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2675 VectorType *VecTy,
2676 unsigned Index) override {
2677 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2678 }
2679 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2680 const Instruction *I = nullptr) override {
2681 return Impl.getCFInstrCost(Opcode, CostKind, I);
2682 }
2683 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2684 CmpInst::Predicate VecPred,
2686 const Instruction *I) override {
2687 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2688 }
2689 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2691 unsigned Index, Value *Op0,
2692 Value *Op1) override {
2693 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2694 }
2695 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2697 unsigned Index) override {
2698 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2699 }
2700 InstructionCost
2701 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2702 const APInt &DemandedDstElts,
2703 TTI::TargetCostKind CostKind) override {
2704 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2705 DemandedDstElts, CostKind);
2706 }
2707 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2708 unsigned AddressSpace,
2710 OperandValueInfo OpInfo,
2711 const Instruction *I) override {
2712 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2713 OpInfo, I);
2714 }
2715 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2716 unsigned AddressSpace,
2718 const Instruction *I) override {
2719 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2720 CostKind, I);
2721 }
2722 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2723 Align Alignment, unsigned AddressSpace,
2724 TTI::TargetCostKind CostKind) override {
2725 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2726 CostKind);
2727 }
2728 InstructionCost
2729 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2730 bool VariableMask, Align Alignment,
2732 const Instruction *I = nullptr) override {
2733 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2734 Alignment, CostKind, I);
2735 }
2736 InstructionCost
2737 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2738 bool VariableMask, Align Alignment,
2740 const Instruction *I = nullptr) override {
2741 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2742 Alignment, CostKind, I);
2743 }
2744 InstructionCost getInterleavedMemoryOpCost(
2745 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2746 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2747 bool UseMaskForCond, bool UseMaskForGaps) override {
2748 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2749 Alignment, AddressSpace, CostKind,
2750 UseMaskForCond, UseMaskForGaps);
2751 }
2752 InstructionCost
2753 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2754 std::optional<FastMathFlags> FMF,
2755 TTI::TargetCostKind CostKind) override {
2756 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2757 }
2758 InstructionCost
2759 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2760 TTI::TargetCostKind CostKind) override {
2761 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2762 }
2763 InstructionCost
2764 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2765 VectorType *Ty, FastMathFlags FMF,
2766 TTI::TargetCostKind CostKind) override {
2767 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2768 CostKind);
2769 }
2770 InstructionCost
2771 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2772 TTI::TargetCostKind CostKind) override {
2773 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2774 }
2775 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2776 TTI::TargetCostKind CostKind) override {
2777 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2778 }
2779 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2780 ArrayRef<Type *> Tys,
2781 TTI::TargetCostKind CostKind) override {
2782 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2783 }
2784 unsigned getNumberOfParts(Type *Tp) override {
2785 return Impl.getNumberOfParts(Tp);
2786 }
2787 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2788 const SCEV *Ptr) override {
2789 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2790 }
2791 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2792 return Impl.getCostOfKeepingLiveOverCall(Tys);
2793 }
2794 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2795 MemIntrinsicInfo &Info) override {
2796 return Impl.getTgtMemIntrinsic(Inst, Info);
2797 }
2798 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2799 return Impl.getAtomicMemIntrinsicMaxElementSize();
2800 }
2801 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2802 Type *ExpectedType) override {
2803 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2804 }
2805 Type *getMemcpyLoopLoweringType(
2806 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2807 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2808 std::optional<uint32_t> AtomicElementSize) const override {
2809 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2810 DestAddrSpace, SrcAlign, DestAlign,
2811 AtomicElementSize);
2812 }
2813 void getMemcpyLoopResidualLoweringType(
2814 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2815 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2816 unsigned SrcAlign, unsigned DestAlign,
2817 std::optional<uint32_t> AtomicCpySize) const override {
2818 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2819 SrcAddrSpace, DestAddrSpace,
2820 SrcAlign, DestAlign, AtomicCpySize);
2821 }
2822 bool areInlineCompatible(const Function *Caller,
2823 const Function *Callee) const override {
2824 return Impl.areInlineCompatible(Caller, Callee);
2825 }
2826 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2827 unsigned DefaultCallPenalty) const override {
2828 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2829 }
2830 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2831 const ArrayRef<Type *> &Types) const override {
2832 return Impl.areTypesABICompatible(Caller, Callee, Types);
2833 }
2834 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2835 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2836 }
2837 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2838 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2839 }
2840 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2841 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2842 }
2843 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2844 return Impl.isLegalToVectorizeLoad(LI);
2845 }
2846 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2847 return Impl.isLegalToVectorizeStore(SI);
2848 }
2849 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2850 unsigned AddrSpace) const override {
2851 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2852 AddrSpace);
2853 }
2854 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2855 unsigned AddrSpace) const override {
2856 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2857 AddrSpace);
2858 }
2859 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2860 ElementCount VF) const override {
2861 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2862 }
2863 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2864 return Impl.isElementTypeLegalForScalableVector(Ty);
2865 }
2866 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2867 unsigned ChainSizeInBytes,
2868 VectorType *VecTy) const override {
2869 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2870 }
2871 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2872 unsigned ChainSizeInBytes,
2873 VectorType *VecTy) const override {
2874 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2875 }
2876 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2877 ReductionFlags Flags) const override {
2878 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2879 }
2880 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2881 ReductionFlags Flags) const override {
2882 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2883 }
2884 bool preferEpilogueVectorization() const override {
2885 return Impl.preferEpilogueVectorization();
2886 }
2887
2888 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2889 return Impl.shouldExpandReduction(II);
2890 }
2891
2892 unsigned getGISelRematGlobalCost() const override {
2893 return Impl.getGISelRematGlobalCost();
2894 }
2895
2896 unsigned getMinTripCountTailFoldingThreshold() const override {
2897 return Impl.getMinTripCountTailFoldingThreshold();
2898 }
2899
2900 bool supportsScalableVectors() const override {
2901 return Impl.supportsScalableVectors();
2902 }
2903
2904 bool enableScalableVectorization() const override {
2905 return Impl.enableScalableVectorization();
2906 }
2907
2908 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2909 Align Alignment) const override {
2910 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2911 }
2912
2914 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2915 return Impl.getVPLegalizationStrategy(PI);
2916 }
2917
2918 bool hasArmWideBranch(bool Thumb) const override {
2919 return Impl.hasArmWideBranch(Thumb);
2920 }
2921
2922 unsigned getMaxNumArgs() const override {
2923 return Impl.getMaxNumArgs();
2924 }
2925};
2926
2927template <typename T>
2929 : TTIImpl(new Model<T>(Impl)) {}
2930
2931/// Analysis pass providing the \c TargetTransformInfo.
2932///
2933/// The core idea of the TargetIRAnalysis is to expose an interface through
2934/// which LLVM targets can analyze and provide information about the middle
2935/// end's target-independent IR. This supports use cases such as target-aware
2936/// cost modeling of IR constructs.
2937///
2938/// This is a function analysis because much of the cost modeling for targets
2939/// is done in a subtarget specific way and LLVM supports compiling different
2940/// functions targeting different subtargets in order to support runtime
2941/// dispatch according to the observed subtarget.
2942class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2943public:
2945
2946 /// Default construct a target IR analysis.
2947 ///
2948 /// This will use the module's datalayout to construct a baseline
2949 /// conservative TTI result.
2951
2952 /// Construct an IR analysis pass around a target-provide callback.
2953 ///
2954 /// The callback will be called with a particular function for which the TTI
2955 /// is needed and must return a TTI object for that function.
2956 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2957
2958 // Value semantics. We spell out the constructors for MSVC.
2960 : TTICallback(Arg.TTICallback) {}
2962 : TTICallback(std::move(Arg.TTICallback)) {}
2964 TTICallback = RHS.TTICallback;
2965 return *this;
2966 }
2968 TTICallback = std::move(RHS.TTICallback);
2969 return *this;
2970 }
2971
2973
2974private:
2976 static AnalysisKey Key;
2977
2978 /// The callback used to produce a result.
2979 ///
2980 /// We use a completely opaque callback so that targets can provide whatever
2981 /// mechanism they desire for constructing the TTI for a given function.
2982 ///
2983 /// FIXME: Should we really use std::function? It's relatively inefficient.
2984 /// It might be possible to arrange for even stateful callbacks to outlive
2985 /// the analysis and thus use a function_ref which would be lighter weight.
2986 /// This may also be less error prone as the callback is likely to reference
2987 /// the external TargetMachine, and that reference needs to never dangle.
2988 std::function<Result(const Function &)> TTICallback;
2989
2990 /// Helper function used as the callback in the default constructor.
2991 static Result getDefaultTTI(const Function &F);
2992};
2993
2994/// Wrapper pass for TargetTransformInfo.
2995///
2996/// This pass can be constructed from a TTI object which it stores internally
2997/// and is queried by passes.
2999 TargetIRAnalysis TIRA;
3000 std::optional<TargetTransformInfo> TTI;
3001
3002 virtual void anchor();
3003
3004public:
3005 static char ID;
3006
3007 /// We must provide a default constructor for the pass but it should
3008 /// never be used.
3009 ///
3010 /// Use the constructor below or call one of the creation routines.
3012
3014
3016};
3017
3018/// Create an analysis pass wrapper around a TTI object.
3019///
3020/// This analysis pass just holds the TTI instance and makes it available to
3021/// clients.
3023
3024} // namespace llvm
3025
3026#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:77
an instruction to allocate memory on the stack
Definition: Instructions.h:60
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:360
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:587
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:185
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:318
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)=0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool isLegalAddScalableImmediate(int64_t Imm)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool shouldDropLSRSolutionIfLessProfitable() const =0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args, const Instruction *CxtI)=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool shouldFoldTerminatingConditionAfterLSR() const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I, int64_t ScalableOffset)=0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool shouldFoldTerminatingConditionAfterLSR() const
Return true if LSR should attempts to replace a use of an otherwise dead primary IV in the latch cond...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:456
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:97
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.