LLVM 20.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/APInt.h"
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class Function;
48class GlobalValue;
49class InstCombiner;
50class OptimizationRemarkEmitter;
51class InterleavedAccessInfo;
52class IntrinsicInst;
53class LoadInst;
54class Loop;
55class LoopInfo;
56class LoopVectorizationLegality;
57class ProfileSummaryInfo;
58class RecurrenceDescriptor;
59class SCEV;
60class ScalarEvolution;
61class SmallBitVector;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class VPIntrinsic;
67struct KnownBits;
68
69/// Information about a load/store intrinsic defined by the target.
71 /// This is the pointer that the intrinsic is loading from or storing to.
72 /// If this is non-null, then analysis/optimization passes can assume that
73 /// this intrinsic is functionally equivalent to a load/store from this
74 /// pointer.
75 Value *PtrVal = nullptr;
76
77 // Ordering for atomic operations.
79
80 // Same Id is set by the target for corresponding load/store intrinsics.
81 unsigned short MatchingId = 0;
82
83 bool ReadMem = false;
84 bool WriteMem = false;
85 bool IsVolatile = false;
86
87 bool isUnordered() const {
91 }
92};
93
94/// Attributes of a target dependent hardware loop.
96 HardwareLoopInfo() = delete;
98 Loop *L = nullptr;
101 const SCEV *ExitCount = nullptr;
103 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104 // value in every iteration.
105 bool IsNestingLegal = false; // Can a hardware loop be a parent to
106 // another hardware loop?
107 bool CounterInReg = false; // Should loop counter be updated in
108 // the loop via a phi?
109 bool PerformEntryTest = false; // Generate the intrinsic which also performs
110 // icmp ne zero on the loop counter value and
111 // produces an i1 to guard the loop entry.
113 DominatorTree &DT, bool ForceNestedLoop = false,
114 bool ForceHardwareLoopPHI = false);
115 bool canAnalyze(LoopInfo &LI);
116};
117
119 const IntrinsicInst *II = nullptr;
120 Type *RetTy = nullptr;
121 Intrinsic::ID IID;
122 SmallVector<Type *, 4> ParamTys;
124 FastMathFlags FMF;
125 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126 // arguments and the return value will be computed based on types.
127 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128
129public:
131 Intrinsic::ID Id, const CallBase &CI,
133 bool TypeBasedOnly = false);
134
136 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
137 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
139
142
146 const IntrinsicInst *I = nullptr,
148
149 Intrinsic::ID getID() const { return IID; }
150 const IntrinsicInst *getInst() const { return II; }
151 Type *getReturnType() const { return RetTy; }
152 FastMathFlags getFlags() const { return FMF; }
153 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
155 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
156
157 bool isTypeBasedOnly() const {
158 return Arguments.empty();
159 }
160
161 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
162};
163
165 /// Don't use tail folding
166 None,
167 /// Use predicate only to mask operations on data in the loop.
168 /// When the VL is not known to be a power-of-2, this method requires a
169 /// runtime overflow check for the i + VL in the loop because it compares the
170 /// scalar induction variable against the tripcount rounded up by VL which may
171 /// overflow. When the VL is a power-of-2, both the increment and uprounded
172 /// tripcount will overflow to 0, which does not require a runtime check
173 /// since the loop is exited when the loop induction variable equals the
174 /// uprounded trip-count, which are both 0.
175 Data,
176 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
177 /// calculate the mask and instead implements this with a
178 /// splat/stepvector/cmp.
179 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
180 /// active.lane.mask intrinsic when it is not natively supported?
182 /// Use predicate to control both data and control flow.
183 /// This method always requires a runtime overflow check for the i + VL
184 /// increment inside the loop, because it uses the result direclty in the
185 /// active.lane.mask to calculate the mask for the next iteration. If the
186 /// increment overflows, the mask is no longer correct.
188 /// Use predicate to control both data and control flow, but modify
189 /// the trip count so that a runtime overflow check can be avoided
190 /// and such that the scalar epilogue loop can always be removed.
192 /// Use predicated EVL instructions for tail-folding.
193 /// Indicates that VP intrinsics should be used.
195};
196
203 : TLI(TLI), LVL(LVL), IAI(IAI) {}
204};
205
206class TargetTransformInfo;
208
209/// This pass provides access to the codegen interfaces that are needed
210/// for IR-level transformations.
212public:
213 /// Construct a TTI object using a type implementing the \c Concept
214 /// API below.
215 ///
216 /// This is used by targets to construct a TTI wrapping their target-specific
217 /// implementation that encodes appropriate costs for their target.
218 template <typename T> TargetTransformInfo(T Impl);
219
220 /// Construct a baseline TTI object using a minimal implementation of
221 /// the \c Concept API below.
222 ///
223 /// The TTI implementation will reflect the information in the DataLayout
224 /// provided if non-null.
225 explicit TargetTransformInfo(const DataLayout &DL);
226
227 // Provide move semantics.
230
231 // We need to define the destructor out-of-line to define our sub-classes
232 // out-of-line.
234
235 /// Handle the invalidation of this information.
236 ///
237 /// When used as a result of \c TargetIRAnalysis this method will be called
238 /// when the function this was computed for changes. When it returns false,
239 /// the information is preserved across those changes.
242 // FIXME: We should probably in some way ensure that the subtarget
243 // information for a function hasn't changed.
244 return false;
245 }
246
247 /// \name Generic Target Information
248 /// @{
249
250 /// The kind of cost model.
251 ///
252 /// There are several different cost models that can be customized by the
253 /// target. The normalization of each cost model may be target specific.
254 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
255 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
257 TCK_RecipThroughput, ///< Reciprocal throughput.
258 TCK_Latency, ///< The latency of instruction.
259 TCK_CodeSize, ///< Instruction code size.
260 TCK_SizeAndLatency ///< The weighted sum of size and latency.
261 };
262
263 /// Underlying constants for 'cost' values in this interface.
264 ///
265 /// Many APIs in this interface return a cost. This enum defines the
266 /// fundamental values that should be used to interpret (and produce) those
267 /// costs. The costs are returned as an int rather than a member of this
268 /// enumeration because it is expected that the cost of one IR instruction
269 /// may have a multiplicative factor to it or otherwise won't fit directly
270 /// into the enum. Moreover, it is common to sum or average costs which works
271 /// better as simple integral values. Thus this enum only provides constants.
272 /// Also note that the returned costs are signed integers to make it natural
273 /// to add, subtract, and test with zero (a common boundary condition). It is
274 /// not expected that 2^32 is a realistic cost to be modeling at any point.
275 ///
276 /// Note that these costs should usually reflect the intersection of code-size
277 /// cost and execution cost. A free instruction is typically one that folds
278 /// into another instruction. For example, reg-to-reg moves can often be
279 /// skipped by renaming the registers in the CPU, but they still are encoded
280 /// and thus wouldn't be considered 'free' here.
282 TCC_Free = 0, ///< Expected to fold away in lowering.
283 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
284 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
285 };
286
287 /// Estimate the cost of a GEP operation when lowered.
288 ///
289 /// \p PointeeType is the source element type of the GEP.
290 /// \p Ptr is the base pointer operand.
291 /// \p Operands is the list of indices following the base pointer.
292 ///
293 /// \p AccessType is a hint as to what type of memory might be accessed by
294 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
295 /// folded into the addressing mode of a load/store. If AccessType is null,
296 /// then the resulting target type based off of PointeeType will be used as an
297 /// approximation.
299 getGEPCost(Type *PointeeType, const Value *Ptr,
300 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
302
303 /// Describe known properties for a set of pointers.
305 /// All the GEPs in a set have same base address.
306 unsigned IsSameBaseAddress : 1;
307 /// These properties only valid if SameBaseAddress is set.
308 /// True if all pointers are separated by a unit stride.
309 unsigned IsUnitStride : 1;
310 /// True if distance between any two neigbouring pointers is a known value.
311 unsigned IsKnownStride : 1;
312 unsigned Reserved : 29;
313
314 bool isSameBase() const { return IsSameBaseAddress; }
315 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
317
319 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
320 /*IsKnownStride=*/1, 0};
321 }
323 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
324 /*IsKnownStride=*/1, 0};
325 }
327 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
328 /*IsKnownStride=*/0, 0};
329 }
330 };
331 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
332
333 /// Estimate the cost of a chain of pointers (typically pointer operands of a
334 /// chain of loads or stores within same block) operations set when lowered.
335 /// \p AccessTy is the type of the loads/stores that will ultimately use the
336 /// \p Ptrs.
339 const PointersChainInfo &Info, Type *AccessTy,
341
342 ) const;
343
344 /// \returns A value by which our inlining threshold should be multiplied.
345 /// This is primarily used to bump up the inlining threshold wholesale on
346 /// targets where calls are unusually expensive.
347 ///
348 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
349 /// individual classes of instructions would be better.
350 unsigned getInliningThresholdMultiplier() const;
351
354
355 /// \returns A value to be added to the inlining threshold.
356 unsigned adjustInliningThreshold(const CallBase *CB) const;
357
358 /// \returns The cost of having an Alloca in the caller if not inlined, to be
359 /// added to the threshold
360 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
361
362 /// \returns Vector bonus in percent.
363 ///
364 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
365 /// and apply this bonus based on the percentage of vector instructions. A
366 /// bonus is applied if the vector instructions exceed 50% and half that
367 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
368 /// arbitrary and evolved over time by accident as much as because they are
369 /// principled bonuses.
370 /// FIXME: It would be nice to base the bonus values on something more
371 /// scientific. A target may has no bonus on vector instructions.
373
374 /// \return the expected cost of a memcpy, which could e.g. depend on the
375 /// source/destination type and alignment and the number of bytes copied.
377
378 /// Returns the maximum memset / memcpy size in bytes that still makes it
379 /// profitable to inline the call.
381
382 /// \return The estimated number of case clusters when lowering \p 'SI'.
383 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
384 /// table.
386 unsigned &JTSize,
388 BlockFrequencyInfo *BFI) const;
389
390 /// Estimate the cost of a given IR user when lowered.
391 ///
392 /// This can estimate the cost of either a ConstantExpr or Instruction when
393 /// lowered.
394 ///
395 /// \p Operands is a list of operands which can be a result of transformations
396 /// of the current operands. The number of the operands on the list must equal
397 /// to the number of the current operands the IR user has. Their order on the
398 /// list must be the same as the order of the current operands the IR user
399 /// has.
400 ///
401 /// The returned cost is defined in terms of \c TargetCostConstants, see its
402 /// comments for a detailed explanation of the cost values.
406
407 /// This is a helper function which calls the three-argument
408 /// getInstructionCost with \p Operands which are the current operands U has.
410 TargetCostKind CostKind) const {
411 SmallVector<const Value *, 4> Operands(U->operand_values());
413 }
414
415 /// If a branch or a select condition is skewed in one direction by more than
416 /// this factor, it is very likely to be predicted correctly.
418
419 /// Returns estimated penalty of a branch misprediction in latency. Indicates
420 /// how aggressive the target wants for eliminating unpredictable branches. A
421 /// zero return value means extra optimization applied to them should be
422 /// minimal.
424
425 /// Return true if branch divergence exists.
426 ///
427 /// Branch divergence has a significantly negative impact on GPU performance
428 /// when threads in the same wavefront take different paths due to conditional
429 /// branches.
430 ///
431 /// If \p F is passed, provides a context function. If \p F is known to only
432 /// execute in a single threaded environment, the target may choose to skip
433 /// uniformity analysis and assume all values are uniform.
434 bool hasBranchDivergence(const Function *F = nullptr) const;
435
436 /// Returns whether V is a source of divergence.
437 ///
438 /// This function provides the target-dependent information for
439 /// the target-independent UniformityAnalysis.
440 bool isSourceOfDivergence(const Value *V) const;
441
442 // Returns true for the target specific
443 // set of operations which produce uniform result
444 // even taking non-uniform arguments
445 bool isAlwaysUniform(const Value *V) const;
446
447 /// Query the target whether the specified address space cast from FromAS to
448 /// ToAS is valid.
449 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
450
451 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
452 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
453
454 /// Returns the address space ID for a target's 'flat' address space. Note
455 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
456 /// refers to as the generic address space. The flat address space is a
457 /// generic address space that can be used access multiple segments of memory
458 /// with different address spaces. Access of a memory location through a
459 /// pointer with this address space is expected to be legal but slower
460 /// compared to the same memory location accessed through a pointer with a
461 /// different address space.
462 //
463 /// This is for targets with different pointer representations which can
464 /// be converted with the addrspacecast instruction. If a pointer is converted
465 /// to this address space, optimizations should attempt to replace the access
466 /// with the source address space.
467 ///
468 /// \returns ~0u if the target does not have such a flat address space to
469 /// optimize away.
470 unsigned getFlatAddressSpace() const;
471
472 /// Return any intrinsic address operand indexes which may be rewritten if
473 /// they use a flat address space pointer.
474 ///
475 /// \returns true if the intrinsic was handled.
477 Intrinsic::ID IID) const;
478
479 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
480
481 /// Return true if globals in this address space can have initializers other
482 /// than `undef`.
484
485 unsigned getAssumedAddrSpace(const Value *V) const;
486
487 bool isSingleThreaded() const;
488
489 std::pair<const Value *, unsigned>
490 getPredicatedAddrSpace(const Value *V) const;
491
492 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
493 /// NewV, which has a different address space. This should happen for every
494 /// operand index that collectFlatAddressOperands returned for the intrinsic.
495 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
496 /// new value (which may be the original \p II with modified operands).
498 Value *NewV) const;
499
500 /// Test whether calls to a function lower to actual program function
501 /// calls.
502 ///
503 /// The idea is to test whether the program is likely to require a 'call'
504 /// instruction or equivalent in order to call the given function.
505 ///
506 /// FIXME: It's not clear that this is a good or useful query API. Client's
507 /// should probably move to simpler cost metrics using the above.
508 /// Alternatively, we could split the cost interface into distinct code-size
509 /// and execution-speed costs. This would allow modelling the core of this
510 /// query more accurately as a call is a single small instruction, but
511 /// incurs significant execution cost.
512 bool isLoweredToCall(const Function *F) const;
513
514 struct LSRCost {
515 /// TODO: Some of these could be merged. Also, a lexical ordering
516 /// isn't always optimal.
517 unsigned Insns;
518 unsigned NumRegs;
519 unsigned AddRecCost;
520 unsigned NumIVMuls;
521 unsigned NumBaseAdds;
522 unsigned ImmCost;
523 unsigned SetupCost;
524 unsigned ScaleCost;
525 };
526
527 /// Parameters that control the generic loop unrolling transformation.
529 /// The cost threshold for the unrolled loop. Should be relative to the
530 /// getInstructionCost values returned by this API, and the expectation is
531 /// that the unrolled loop's instructions when run through that interface
532 /// should not exceed this cost. However, this is only an estimate. Also,
533 /// specific loops may be unrolled even with a cost above this threshold if
534 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
535 /// restriction.
536 unsigned Threshold;
537 /// If complete unrolling will reduce the cost of the loop, we will boost
538 /// the Threshold by a certain percent to allow more aggressive complete
539 /// unrolling. This value provides the maximum boost percentage that we
540 /// can apply to Threshold (The value should be no less than 100).
541 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
542 /// MaxPercentThresholdBoost / 100)
543 /// E.g. if complete unrolling reduces the loop execution time by 50%
544 /// then we boost the threshold by the factor of 2x. If unrolling is not
545 /// expected to reduce the running time, then we do not increase the
546 /// threshold.
548 /// The cost threshold for the unrolled loop when optimizing for size (set
549 /// to UINT_MAX to disable).
551 /// The cost threshold for the unrolled loop, like Threshold, but used
552 /// for partial/runtime unrolling (set to UINT_MAX to disable).
554 /// The cost threshold for the unrolled loop when optimizing for size, like
555 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
556 /// UINT_MAX to disable).
558 /// A forced unrolling factor (the number of concatenated bodies of the
559 /// original loop in the unrolled loop body). When set to 0, the unrolling
560 /// transformation will select an unrolling factor based on the current cost
561 /// threshold and other factors.
562 unsigned Count;
563 /// Default unroll count for loops with run-time trip count.
565 // Set the maximum unrolling factor. The unrolling factor may be selected
566 // using the appropriate cost threshold, but may not exceed this number
567 // (set to UINT_MAX to disable). This does not apply in cases where the
568 // loop is being fully unrolled.
569 unsigned MaxCount;
570 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
571 /// to be overrided by a target gives more flexiblity on certain cases.
572 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
574 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
575 /// applies even if full unrolling is selected. This allows a target to fall
576 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
578 // Represents number of instructions optimized when "back edge"
579 // becomes "fall through" in unrolled loop.
580 // For now we count a conditional branch on a backedge and a comparison
581 // feeding it.
582 unsigned BEInsns;
583 /// Allow partial unrolling (unrolling of loops to expand the size of the
584 /// loop body, not only to eliminate small constant-trip-count loops).
586 /// Allow runtime unrolling (unrolling of loops to expand the size of the
587 /// loop body even when the number of loop iterations is not known at
588 /// compile time).
590 /// Allow generation of a loop remainder (extra iterations after unroll).
592 /// Allow emitting expensive instructions (such as divisions) when computing
593 /// the trip count of a loop for runtime unrolling.
595 /// Apply loop unroll on any kind of loop
596 /// (mainly to loops that fail runtime unrolling).
597 bool Force;
598 /// Allow using trip count upper bound to unroll loops.
600 /// Allow unrolling of all the iterations of the runtime loop remainder.
602 /// Allow unroll and jam. Used to enable unroll and jam for the target.
604 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
605 /// value above is used during unroll and jam for the outer loop size.
606 /// This value is used in the same manner to limit the size of the inner
607 /// loop.
609 /// Don't allow loop unrolling to simulate more than this number of
610 /// iterations when checking full unroll profitability
612 /// Don't disable runtime unroll for the loops which were vectorized.
614 };
615
616 /// Get target-customized preferences for the generic loop unrolling
617 /// transformation. The caller will initialize UP with the current
618 /// target-independent defaults.
621 OptimizationRemarkEmitter *ORE) const;
622
623 /// Query the target whether it would be profitable to convert the given loop
624 /// into a hardware loop.
627 HardwareLoopInfo &HWLoopInfo) const;
628
629 /// Query the target whether it would be prefered to create a predicated
630 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
632
633 /// Query the target what the preferred style of tail folding is.
634 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
635 /// may (or will never) overflow for the suggested VF/UF in the given loop.
636 /// Targets can use this information to select a more optimal tail folding
637 /// style. The value conservatively defaults to true, such that no assumptions
638 /// are made on overflow.
640 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
641
642 // Parameters that control the loop peeling transformation
644 /// A forced peeling factor (the number of bodied of the original loop
645 /// that should be peeled off before the loop body). When set to 0, the
646 /// a peeling factor based on profile information and other factors.
647 unsigned PeelCount;
648 /// Allow peeling off loop iterations.
650 /// Allow peeling off loop iterations for loop nests.
652 /// Allow peeling basing on profile. Uses to enable peeling off all
653 /// iterations basing on provided profile.
654 /// If the value is true the peeling cost model can decide to peel only
655 /// some iterations and in this case it will set this to false.
657 };
658
659 /// Get target-customized preferences for the generic loop peeling
660 /// transformation. The caller will initialize \p PP with the current
661 /// target-independent defaults with information from \p L and \p SE.
663 PeelingPreferences &PP) const;
664
665 /// Targets can implement their own combinations for target-specific
666 /// intrinsics. This function will be called from the InstCombine pass every
667 /// time a target-specific intrinsic is encountered.
668 ///
669 /// \returns std::nullopt to not do anything target specific or a value that
670 /// will be returned from the InstCombiner. It is possible to return null and
671 /// stop further processing of the intrinsic by returning nullptr.
672 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
673 IntrinsicInst & II) const;
674 /// Can be used to implement target-specific instruction combining.
675 /// \see instCombineIntrinsic
676 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
677 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
678 KnownBits & Known, bool &KnownBitsComputed) const;
679 /// Can be used to implement target-specific instruction combining.
680 /// \see instCombineIntrinsic
681 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
682 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
683 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
684 std::function<void(Instruction *, unsigned, APInt, APInt &)>
685 SimplifyAndSetOp) const;
686 /// @}
687
688 /// \name Scalar Target Information
689 /// @{
690
691 /// Flags indicating the kind of support for population count.
692 ///
693 /// Compared to the SW implementation, HW support is supposed to
694 /// significantly boost the performance when the population is dense, and it
695 /// may or may not degrade performance if the population is sparse. A HW
696 /// support is considered as "Fast" if it can outperform, or is on a par
697 /// with, SW implementation when the population is sparse; otherwise, it is
698 /// considered as "Slow".
700
701 /// Return true if the specified immediate is legal add immediate, that
702 /// is the target has add instructions which can add a register with the
703 /// immediate without having to materialize the immediate into a register.
704 bool isLegalAddImmediate(int64_t Imm) const;
705
706 /// Return true if adding the specified scalable immediate is legal, that is
707 /// the target has add instructions which can add a register with the
708 /// immediate (multiplied by vscale) without having to materialize the
709 /// immediate into a register.
710 bool isLegalAddScalableImmediate(int64_t Imm) const;
711
712 /// Return true if the specified immediate is legal icmp immediate,
713 /// that is the target has icmp instructions which can compare a register
714 /// against the immediate without having to materialize the immediate into a
715 /// register.
716 bool isLegalICmpImmediate(int64_t Imm) const;
717
718 /// Return true if the addressing mode represented by AM is legal for
719 /// this target, for a load/store of the specified type.
720 /// The type may be VoidTy, in which case only return true if the addressing
721 /// mode is legal for a load/store of any legal type.
722 /// If target returns true in LSRWithInstrQueries(), I may be valid.
723 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
724 /// an invariant value known only at runtime. Most targets should not accept
725 /// a scalable offset.
726 ///
727 /// TODO: Handle pre/postinc as well.
728 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
729 bool HasBaseReg, int64_t Scale,
730 unsigned AddrSpace = 0, Instruction *I = nullptr,
731 int64_t ScalableOffset = 0) const;
732
733 /// Return true if LSR cost of C1 is lower than C2.
735 const TargetTransformInfo::LSRCost &C2) const;
736
737 /// Return true if LSR major cost is number of registers. Targets which
738 /// implement their own isLSRCostLess and unset number of registers as major
739 /// cost should return false, otherwise return true.
740 bool isNumRegsMajorCostOfLSR() const;
741
742 /// Return true if LSR should drop a found solution if it's calculated to be
743 /// less profitable than the baseline.
745
746 /// \returns true if LSR should not optimize a chain that includes \p I.
748
749 /// Return true if the target can fuse a compare and branch.
750 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
751 /// calculation for the instructions in a loop.
752 bool canMacroFuseCmp() const;
753
754 /// Return true if the target can save a compare for loop count, for example
755 /// hardware loop saves a compare.
756 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
758 TargetLibraryInfo *LibInfo) const;
759
764 };
765
766 /// Return the preferred addressing mode LSR should make efforts to generate.
768 ScalarEvolution *SE) const;
769
770 /// Return true if the target supports masked store.
771 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
772 /// Return true if the target supports masked load.
773 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
774
775 /// Return true if the target supports nontemporal store.
776 bool isLegalNTStore(Type *DataType, Align Alignment) const;
777 /// Return true if the target supports nontemporal load.
778 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
779
780 /// \Returns true if the target supports broadcasting a load to a vector of
781 /// type <NumElements x ElementTy>.
782 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
783
784 /// Return true if the target supports masked scatter.
785 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
786 /// Return true if the target supports masked gather.
787 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
788 /// Return true if the target forces scalarizing of llvm.masked.gather
789 /// intrinsics.
790 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
791 /// Return true if the target forces scalarizing of llvm.masked.scatter
792 /// intrinsics.
793 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
794
795 /// Return true if the target supports masked compress store.
796 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
797 /// Return true if the target supports masked expand load.
798 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
799
800 /// Return true if the target supports strided load.
801 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
802
803 // Return true if the target supports masked vector histograms.
804 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const;
805
806 /// Return true if this is an alternating opcode pattern that can be lowered
807 /// to a single instruction on the target. In X86 this is for the addsub
808 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
809 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
810 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
811 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
812 /// \p VecTy is the vector type of the instruction to be generated.
813 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
814 const SmallBitVector &OpcodeMask) const;
815
816 /// Return true if we should be enabling ordered reductions for the target.
817 bool enableOrderedReductions() const;
818
819 /// Return true if the target has a unified operation to calculate division
820 /// and remainder. If so, the additional implicit multiplication and
821 /// subtraction required to calculate a remainder from division are free. This
822 /// can enable more aggressive transformations for division and remainder than
823 /// would typically be allowed using throughput or size cost models.
824 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
825
826 /// Return true if the given instruction (assumed to be a memory access
827 /// instruction) has a volatile variant. If that's the case then we can avoid
828 /// addrspacecast to generic AS for volatile loads/stores. Default
829 /// implementation returns false, which prevents address space inference for
830 /// volatile loads/stores.
831 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
832
833 /// Return true if target doesn't mind addresses in vectors.
834 bool prefersVectorizedAddressing() const;
835
836 /// Return the cost of the scaling factor used in the addressing
837 /// mode represented by AM for this target, for a load/store
838 /// of the specified type.
839 /// If the AM is supported, the return value must be >= 0.
840 /// If the AM is not supported, it returns a negative value.
841 /// TODO: Handle pre/postinc as well.
843 StackOffset BaseOffset, bool HasBaseReg,
844 int64_t Scale,
845 unsigned AddrSpace = 0) const;
846
847 /// Return true if the loop strength reduce pass should make
848 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
849 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
850 /// immediate offset and no index register.
851 bool LSRWithInstrQueries() const;
852
853 /// Return true if it's free to truncate a value of type Ty1 to type
854 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
855 /// by referencing its sub-register AX.
856 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
857
858 /// Return true if it is profitable to hoist instruction in the
859 /// then/else to before if.
860 bool isProfitableToHoist(Instruction *I) const;
861
862 bool useAA() const;
863
864 /// Return true if this type is legal.
865 bool isTypeLegal(Type *Ty) const;
866
867 /// Returns the estimated number of registers required to represent \p Ty.
868 unsigned getRegUsageForType(Type *Ty) const;
869
870 /// Return true if switches should be turned into lookup tables for the
871 /// target.
872 bool shouldBuildLookupTables() const;
873
874 /// Return true if switches should be turned into lookup tables
875 /// containing this constant value for the target.
877
878 /// Return true if lookup tables should be turned into relative lookup tables.
879 bool shouldBuildRelLookupTables() const;
880
881 /// Return true if the input function which is cold at all call sites,
882 /// should use coldcc calling convention.
883 bool useColdCCForColdCall(Function &F) const;
884
885 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
886 /// are set if the demanded result elements need to be inserted and/or
887 /// extracted from vectors.
889 const APInt &DemandedElts,
890 bool Insert, bool Extract,
892
893 /// Estimate the overhead of scalarizing an instructions unique
894 /// non-constant operands. The (potentially vector) types to use for each of
895 /// argument are passes via Tys.
900
901 /// If target has efficient vector element load/store instructions, it can
902 /// return true here so that insertion/extraction costs are not added to
903 /// the scalarization cost of a load/store.
905
906 /// If the target supports tail calls.
907 bool supportsTailCalls() const;
908
909 /// If target supports tail call on \p CB
910 bool supportsTailCallFor(const CallBase *CB) const;
911
912 /// Don't restrict interleaved unrolling to small loops.
913 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
914
915 /// Returns options for expansion of memcmp. IsZeroCmp is
916 // true if this is the expansion of memcmp(p1, p2, s) == 0.
918 // Return true if memcmp expansion is enabled.
919 operator bool() const { return MaxNumLoads > 0; }
920
921 // Maximum number of load operations.
922 unsigned MaxNumLoads = 0;
923
924 // The list of available load sizes (in bytes), sorted in decreasing order.
926
927 // For memcmp expansion when the memcmp result is only compared equal or
928 // not-equal to 0, allow up to this number of load pairs per block. As an
929 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
930 // a0 = load2bytes &a[0]
931 // b0 = load2bytes &b[0]
932 // a2 = load1byte &a[2]
933 // b2 = load1byte &b[2]
934 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
935 unsigned NumLoadsPerBlock = 1;
936
937 // Set to true to allow overlapping loads. For example, 7-byte compares can
938 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
939 // requires all loads in LoadSizes to be doable in an unaligned way.
941
942 // Sometimes, the amount of data that needs to be compared is smaller than
943 // the standard register size, but it cannot be loaded with just one load
944 // instruction. For example, if the size of the memory comparison is 6
945 // bytes, we can handle it more efficiently by loading all 6 bytes in a
946 // single block and generating an 8-byte number, instead of generating two
947 // separate blocks with conditional jumps for 4 and 2 byte loads. This
948 // approach simplifies the process and produces the comparison result as
949 // normal. This array lists the allowed sizes of memcmp tails that can be
950 // merged into one block
952 };
954 bool IsZeroCmp) const;
955
956 /// Should the Select Optimization pass be enabled and ran.
957 bool enableSelectOptimize() const;
958
959 /// Should the Select Optimization pass treat the given instruction like a
960 /// select, potentially converting it to a conditional branch. This can
961 /// include select-like instructions like or(zext(c), x) that can be converted
962 /// to selects.
964
965 /// Enable matching of interleaved access groups.
967
968 /// Enable matching of interleaved access groups that contain predicated
969 /// accesses or gaps and therefore vectorized using masked
970 /// vector loads/stores.
972
973 /// Indicate that it is potentially unsafe to automatically vectorize
974 /// floating-point operations because the semantics of vector and scalar
975 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
976 /// does not support IEEE-754 denormal numbers, while depending on the
977 /// platform, scalar floating-point math does.
978 /// This applies to floating-point math operations and calls, not memory
979 /// operations, shuffles, or casts.
981
982 /// Determine if the target supports unaligned memory accesses.
984 unsigned AddressSpace = 0,
985 Align Alignment = Align(1),
986 unsigned *Fast = nullptr) const;
987
988 /// Return hardware support for population count.
989 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
990
991 /// Return true if the hardware has a fast square-root instruction.
992 bool haveFastSqrt(Type *Ty) const;
993
994 /// Return true if the cost of the instruction is too high to speculatively
995 /// execute and should be kept behind a branch.
996 /// This normally just wraps around a getInstructionCost() call, but some
997 /// targets might report a low TCK_SizeAndLatency value that is incompatible
998 /// with the fixed TCC_Expensive value.
999 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1001
1002 /// Return true if it is faster to check if a floating-point value is NaN
1003 /// (or not-NaN) versus a comparison against a constant FP zero value.
1004 /// Targets should override this if materializing a 0.0 for comparison is
1005 /// generally as cheap as checking for ordered/unordered.
1006 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1007
1008 /// Return the expected cost of supporting the floating point operation
1009 /// of the specified type.
1010 InstructionCost getFPOpCost(Type *Ty) const;
1011
1012 /// Return the expected cost of materializing for the given integer
1013 /// immediate of the specified type.
1014 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1015 TargetCostKind CostKind) const;
1016
1017 /// Return the expected cost of materialization for the given integer
1018 /// immediate of the specified type for a given instruction. The cost can be
1019 /// zero if the immediate can be folded into the specified instruction.
1020 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1021 const APInt &Imm, Type *Ty,
1023 Instruction *Inst = nullptr) const;
1025 const APInt &Imm, Type *Ty,
1026 TargetCostKind CostKind) const;
1027
1028 /// Return the expected cost for the given integer when optimising
1029 /// for size. This is different than the other integer immediate cost
1030 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1031 /// target one ISA such as Aarch32 but smaller encodings could be possible
1032 /// with another such as Thumb. This return value is used as a penalty when
1033 /// the total costs for a constant is calculated (the bigger the cost, the
1034 /// more beneficial constant hoisting is).
1035 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1036 const APInt &Imm, Type *Ty) const;
1037
1038 /// It can be advantageous to detach complex constants from their uses to make
1039 /// their generation cheaper. This hook allows targets to report when such
1040 /// transformations might negatively effect the code generation of the
1041 /// underlying operation. The motivating example is divides whereby hoisting
1042 /// constants prevents the code generator's ability to transform them into
1043 /// combinations of simpler operations.
1045 const Function &Fn) const;
1046
1047 /// @}
1048
1049 /// \name Vector Target Information
1050 /// @{
1051
1052 /// The various kinds of shuffle patterns for vector queries.
1054 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1055 SK_Reverse, ///< Reverse the order of the vector.
1056 SK_Select, ///< Selects elements from the corresponding lane of
1057 ///< either source operand. This is equivalent to a
1058 ///< vector select with a constant condition operand.
1059 SK_Transpose, ///< Transpose two vectors.
1060 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1061 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1062 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1063 ///< with any shuffle mask.
1064 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1065 ///< shuffle mask.
1066 SK_Splice ///< Concatenates elements from the first input vector
1067 ///< with elements of the second input vector. Returning
1068 ///< a vector of the same type as the input vectors.
1069 ///< Index indicates start offset in first input vector.
1071
1072 /// Additional information about an operand's possible values.
1074 OK_AnyValue, // Operand can have any value.
1075 OK_UniformValue, // Operand is uniform (splat of a value).
1076 OK_UniformConstantValue, // Operand is uniform constant.
1077 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1079
1080 /// Additional properties of an operand's values.
1085 };
1086
1087 // Describe the values an operand can take. We're in the process
1088 // of migrating uses of OperandValueKind and OperandValueProperties
1089 // to use this class, and then will change the internal representation.
1093
1094 bool isConstant() const {
1096 }
1097 bool isUniform() const {
1099 }
1100 bool isPowerOf2() const {
1101 return Properties == OP_PowerOf2;
1102 }
1103 bool isNegatedPowerOf2() const {
1105 }
1106
1108 return {Kind, OP_None};
1109 }
1110 };
1111
1112 /// \return the number of registers in the target-provided register class.
1113 unsigned getNumberOfRegisters(unsigned ClassID) const;
1114
1115 /// \return true if the target supports load/store that enables fault
1116 /// suppression of memory operands when the source condition is false.
1117 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
1118
1119 /// \return the target-provided register class ID for the provided type,
1120 /// accounting for type promotion and other type-legalization techniques that
1121 /// the target might apply. However, it specifically does not account for the
1122 /// scalarization or splitting of vector types. Should a vector type require
1123 /// scalarization or splitting into multiple underlying vector registers, that
1124 /// type should be mapped to a register class containing no registers.
1125 /// Specifically, this is designed to provide a simple, high-level view of the
1126 /// register allocation later performed by the backend. These register classes
1127 /// don't necessarily map onto the register classes used by the backend.
1128 /// FIXME: It's not currently possible to determine how many registers
1129 /// are used by the provided type.
1130 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1131
1132 /// \return the target-provided register class name
1133 const char *getRegisterClassName(unsigned ClassID) const;
1134
1136
1137 /// \return The width of the largest scalar or vector register type.
1139
1140 /// \return The width of the smallest vector register type.
1141 unsigned getMinVectorRegisterBitWidth() const;
1142
1143 /// \return The maximum value of vscale if the target specifies an
1144 /// architectural maximum vector length, and std::nullopt otherwise.
1145 std::optional<unsigned> getMaxVScale() const;
1146
1147 /// \return the value of vscale to tune the cost model for.
1148 std::optional<unsigned> getVScaleForTuning() const;
1149
1150 /// \return true if vscale is known to be a power of 2
1151 bool isVScaleKnownToBeAPowerOfTwo() const;
1152
1153 /// \return True if the vectorization factor should be chosen to
1154 /// make the vector of the smallest element type match the size of a
1155 /// vector register. For wider element types, this could result in
1156 /// creating vectors that span multiple vector registers.
1157 /// If false, the vectorization factor will be chosen based on the
1158 /// size of the widest element type.
1159 /// \p K Register Kind for vectorization.
1161
1162 /// \return The minimum vectorization factor for types of given element
1163 /// bit width, or 0 if there is no minimum VF. The returned value only
1164 /// applies when shouldMaximizeVectorBandwidth returns true.
1165 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1166 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1167
1168 /// \return The maximum vectorization factor for types of given element
1169 /// bit width and opcode, or 0 if there is no maximum VF.
1170 /// Currently only used by the SLP vectorizer.
1171 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1172
1173 /// \return The minimum vectorization factor for the store instruction. Given
1174 /// the initial estimation of the minimum vector factor and store value type,
1175 /// it tries to find possible lowest VF, which still might be profitable for
1176 /// the vectorization.
1177 /// \param VF Initial estimation of the minimum vector factor.
1178 /// \param ScalarMemTy Scalar memory type of the store operation.
1179 /// \param ScalarValTy Scalar type of the stored value.
1180 /// Currently only used by the SLP vectorizer.
1181 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1182 Type *ScalarValTy) const;
1183
1184 /// \return True if it should be considered for address type promotion.
1185 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1186 /// profitable without finding other extensions fed by the same input.
1188 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1189
1190 /// \return The size of a cache line in bytes.
1191 unsigned getCacheLineSize() const;
1192
1193 /// The possible cache levels
1194 enum class CacheLevel {
1195 L1D, // The L1 data cache
1196 L2D, // The L2 data cache
1197
1198 // We currently do not model L3 caches, as their sizes differ widely between
1199 // microarchitectures. Also, we currently do not have a use for L3 cache
1200 // size modeling yet.
1201 };
1202
1203 /// \return The size of the cache level in bytes, if available.
1204 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1205
1206 /// \return The associativity of the cache level, if available.
1207 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1208
1209 /// \return The minimum architectural page size for the target.
1210 std::optional<unsigned> getMinPageSize() const;
1211
1212 /// \return How much before a load we should place the prefetch
1213 /// instruction. This is currently measured in number of
1214 /// instructions.
1215 unsigned getPrefetchDistance() const;
1216
1217 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1218 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1219 /// and the arguments provided are meant to serve as a basis for deciding this
1220 /// for a particular loop.
1221 ///
1222 /// \param NumMemAccesses Number of memory accesses in the loop.
1223 /// \param NumStridedMemAccesses Number of the memory accesses that
1224 /// ScalarEvolution could find a known stride
1225 /// for.
1226 /// \param NumPrefetches Number of software prefetches that will be
1227 /// emitted as determined by the addresses
1228 /// involved and the cache line size.
1229 /// \param HasCall True if the loop contains a call.
1230 ///
1231 /// \return This is the minimum stride in bytes where it makes sense to start
1232 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1233 /// stride.
1234 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1235 unsigned NumStridedMemAccesses,
1236 unsigned NumPrefetches, bool HasCall) const;
1237
1238 /// \return The maximum number of iterations to prefetch ahead. If
1239 /// the required number of iterations is more than this number, no
1240 /// prefetching is performed.
1241 unsigned getMaxPrefetchIterationsAhead() const;
1242
1243 /// \return True if prefetching should also be done for writes.
1244 bool enableWritePrefetching() const;
1245
1246 /// \return if target want to issue a prefetch in address space \p AS.
1247 bool shouldPrefetchAddressSpace(unsigned AS) const;
1248
1249 /// \return The maximum interleave factor that any transform should try to
1250 /// perform for this target. This number depends on the level of parallelism
1251 /// and the number of execution units in the CPU.
1252 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1253
1254 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1255 static OperandValueInfo getOperandInfo(const Value *V);
1256
1257 /// This is an approximation of reciprocal throughput of a math/logic op.
1258 /// A higher cost indicates less expected throughput.
1259 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1260 /// clock cycles per instruction when the instructions are not part of a
1261 /// limiting dependency chain."
1262 /// Therefore, costs should be scaled to account for multiple execution units
1263 /// on the target that can process this type of instruction. For example, if
1264 /// there are 5 scalar integer units and 2 vector integer units that can
1265 /// calculate an 'add' in a single cycle, this model should indicate that the
1266 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1267 /// add instruction.
1268 /// \p Args is an optional argument which holds the instruction operands
1269 /// values so the TTI can analyze those values searching for special
1270 /// cases or optimizations based on those values.
1271 /// \p CxtI is the optional original context instruction, if one exists, to
1272 /// provide even more information.
1273 /// \p TLibInfo is used to search for platform specific vector library
1274 /// functions for instructions that might be converted to calls (e.g. frem).
1276 unsigned Opcode, Type *Ty,
1279 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1280 ArrayRef<const Value *> Args = std::nullopt,
1281 const Instruction *CxtI = nullptr,
1282 const TargetLibraryInfo *TLibInfo = nullptr) const;
1283
1284 /// Returns the cost estimation for alternating opcode pattern that can be
1285 /// lowered to a single instruction on the target. In X86 this is for the
1286 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1287 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1288 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1289 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1290 /// \p VecTy is the vector type of the instruction to be generated.
1291 InstructionCost getAltInstrCost(
1292 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1293 const SmallBitVector &OpcodeMask,
1295
1296 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1297 /// The exact mask may be passed as Mask, or else the array will be empty.
1298 /// The index and subtype parameters are used by the subvector insertion and
1299 /// extraction shuffle kinds to show the insert/extract point and the type of
1300 /// the subvector being inserted/extracted. The operands of the shuffle can be
1301 /// passed through \p Args, which helps improve the cost estimation in some
1302 /// cases, like in broadcast loads.
1303 /// NOTE: For subvector extractions Tp represents the source type.
1304 InstructionCost getShuffleCost(
1305 ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
1307 VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
1308 const Instruction *CxtI = nullptr) const;
1309
1310 /// Represents a hint about the context in which a cast is used.
1311 ///
1312 /// For zext/sext, the context of the cast is the operand, which must be a
1313 /// load of some kind. For trunc, the context is of the cast is the single
1314 /// user of the instruction, which must be a store of some kind.
1315 ///
1316 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1317 /// type of cast it's dealing with, as not every cast is equal. For instance,
1318 /// the zext of a load may be free, but the zext of an interleaving load can
1319 //// be (very) expensive!
1320 ///
1321 /// See \c getCastContextHint to compute a CastContextHint from a cast
1322 /// Instruction*. Callers can use it if they don't need to override the
1323 /// context and just want it to be calculated from the instruction.
1324 ///
1325 /// FIXME: This handles the types of load/store that the vectorizer can
1326 /// produce, which are the cases where the context instruction is most
1327 /// likely to be incorrect. There are other situations where that can happen
1328 /// too, which might be handled here but in the long run a more general
1329 /// solution of costing multiple instructions at the same times may be better.
1330 enum class CastContextHint : uint8_t {
1331 None, ///< The cast is not used with a load/store of any kind.
1332 Normal, ///< The cast is used with a normal load/store.
1333 Masked, ///< The cast is used with a masked load/store.
1334 GatherScatter, ///< The cast is used with a gather/scatter.
1335 Interleave, ///< The cast is used with an interleaved load/store.
1336 Reversed, ///< The cast is used with a reversed load/store.
1337 };
1338
1339 /// Calculates a CastContextHint from \p I.
1340 /// This should be used by callers of getCastInstrCost if they wish to
1341 /// determine the context from some instruction.
1342 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1343 /// or if it's another type of cast.
1345
1346 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1347 /// zext, etc. If there is an existing instruction that holds Opcode, it
1348 /// may be passed in the 'I' parameter.
1350 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1353 const Instruction *I = nullptr) const;
1354
1355 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1356 /// Index = -1 to indicate that there is no information about the index value.
1357 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1358 VectorType *VecTy,
1359 unsigned Index) const;
1360
1361 /// \return The expected cost of control-flow related instructions such as
1362 /// Phi, Ret, Br, Switch.
1364 getCFInstrCost(unsigned Opcode,
1366 const Instruction *I = nullptr) const;
1367
1368 /// \returns The expected cost of compare and select instructions. If there
1369 /// is an existing instruction that holds Opcode, it may be passed in the
1370 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1371 /// is using a compare with the specified predicate as condition. When vector
1372 /// types are passed, \p VecPred must be used for all lanes.
1374 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1375 CmpInst::Predicate VecPred,
1377 const Instruction *I = nullptr) const;
1378
1379 /// \return The expected cost of vector Insert and Extract.
1380 /// Use -1 to indicate that there is no information on the index value.
1381 /// This is used when the instruction is not available; a typical use
1382 /// case is to provision the cost of vectorization/scalarization in
1383 /// vectorizer passes.
1384 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1386 unsigned Index = -1, Value *Op0 = nullptr,
1387 Value *Op1 = nullptr) const;
1388
1389 /// \return The expected cost of vector Insert and Extract.
1390 /// This is used when instruction is available, and implementation
1391 /// asserts 'I' is not nullptr.
1392 ///
1393 /// A typical suitable use case is cost estimation when vector instruction
1394 /// exists (e.g., from basic blocks during transformation).
1397 unsigned Index = -1) const;
1398
1399 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1400 /// \p ReplicationFactor times.
1401 ///
1402 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1403 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1404 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1405 int VF,
1406 const APInt &DemandedDstElts,
1408
1409 /// \return The cost of Load and Store instructions.
1411 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1412 unsigned AddressSpace,
1414 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1415 const Instruction *I = nullptr) const;
1416
1417 /// \return The cost of VP Load and Store instructions.
1418 InstructionCost
1419 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1420 unsigned AddressSpace,
1422 const Instruction *I = nullptr) const;
1423
1424 /// \return The cost of masked Load and Store instructions.
1426 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1428
1429 /// \return The cost of Gather or Scatter operation
1430 /// \p Opcode - is a type of memory access Load or Store
1431 /// \p DataTy - a vector type of the data to be loaded or stored
1432 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1433 /// \p VariableMask - true when the memory access is predicated with a mask
1434 /// that is not a compile-time constant
1435 /// \p Alignment - alignment of single element
1436 /// \p I - the optional original context instruction, if one exists, e.g. the
1437 /// load/store to transform or the call to the gather/scatter intrinsic
1439 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1441 const Instruction *I = nullptr) const;
1442
1443 /// \return The cost of strided memory operations.
1444 /// \p Opcode - is a type of memory access Load or Store
1445 /// \p DataTy - a vector type of the data to be loaded or stored
1446 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1447 /// \p VariableMask - true when the memory access is predicated with a mask
1448 /// that is not a compile-time constant
1449 /// \p Alignment - alignment of single element
1450 /// \p I - the optional original context instruction, if one exists, e.g. the
1451 /// load/store to transform or the call to the gather/scatter intrinsic
1453 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1455 const Instruction *I = nullptr) const;
1456
1457 /// \return The cost of the interleaved memory operation.
1458 /// \p Opcode is the memory operation code
1459 /// \p VecTy is the vector type of the interleaved access.
1460 /// \p Factor is the interleave factor
1461 /// \p Indices is the indices for interleaved load members (as interleaved
1462 /// load allows gaps)
1463 /// \p Alignment is the alignment of the memory operation
1464 /// \p AddressSpace is address space of the pointer.
1465 /// \p UseMaskForCond indicates if the memory access is predicated.
1466 /// \p UseMaskForGaps indicates if gaps should be masked.
1468 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1469 Align Alignment, unsigned AddressSpace,
1471 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1472
1473 /// A helper function to determine the type of reduction algorithm used
1474 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1475 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1476 return FMF && !(*FMF).allowReassoc();
1477 }
1478
1479 /// Calculate the cost of vector reduction intrinsics.
1480 ///
1481 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1482 /// value using the operation denoted by \p Opcode. The FastMathFlags
1483 /// parameter \p FMF indicates what type of reduction we are performing:
1484 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1485 /// involves successively splitting a vector into half and doing the
1486 /// operation on the pair of halves until you have a scalar value. For
1487 /// example:
1488 /// (v0, v1, v2, v3)
1489 /// ((v0+v2), (v1+v3), undef, undef)
1490 /// ((v0+v2+v1+v3), undef, undef, undef)
1491 /// This is the default behaviour for integer operations, whereas for
1492 /// floating point we only do this if \p FMF indicates that
1493 /// reassociation is allowed.
1494 /// 2. Ordered. For a vector with N elements this involves performing N
1495 /// operations in lane order, starting with an initial scalar value, i.e.
1496 /// result = InitVal + v0
1497 /// result = result + v1
1498 /// result = result + v2
1499 /// result = result + v3
1500 /// This is only the case for FP operations and when reassociation is not
1501 /// allowed.
1502 ///
1504 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1506
1510
1511 /// Calculate the cost of an extended reduction pattern, similar to
1512 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1513 /// extensions. This is the cost of as:
1514 /// ResTy vecreduce.add(mul (A, B)).
1515 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1517 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1519
1520 /// Calculate the cost of an extended reduction pattern, similar to
1521 /// getArithmeticReductionCost of a reduction with an extension.
1522 /// This is the cost of as:
1523 /// ResTy vecreduce.opcode(ext(Ty A)).
1525 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1526 FastMathFlags FMF,
1528
1529 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1530 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1531 /// 3. scalar instruction which is to be vectorized.
1534
1535 /// \returns The cost of Call instructions.
1539
1540 /// \returns The number of pieces into which the provided type must be
1541 /// split during legalization. Zero is returned when the answer is unknown.
1542 unsigned getNumberOfParts(Type *Tp) const;
1543
1544 /// \returns The cost of the address computation. For most targets this can be
1545 /// merged into the instruction indexing mode. Some targets might want to
1546 /// distinguish between address computation for memory operations on vector
1547 /// types and scalar types. Such targets should override this function.
1548 /// The 'SE' parameter holds pointer for the scalar evolution object which
1549 /// is used in order to get the Ptr step value in case of constant stride.
1550 /// The 'Ptr' parameter holds SCEV of the access pointer.
1552 ScalarEvolution *SE = nullptr,
1553 const SCEV *Ptr = nullptr) const;
1554
1555 /// \returns The cost, if any, of keeping values of the given types alive
1556 /// over a callsite.
1557 ///
1558 /// Some types may require the use of register classes that do not have
1559 /// any callee-saved registers, so would require a spill and fill.
1561
1562 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1563 /// will contain additional information - whether the intrinsic may write
1564 /// or read to memory, volatility and the pointer. Info is undefined
1565 /// if false is returned.
1567
1568 /// \returns The maximum element size, in bytes, for an element
1569 /// unordered-atomic memory intrinsic.
1570 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1571
1572 /// \returns A value which is the result of the given memory intrinsic. New
1573 /// instructions may be created to extract the result from the given intrinsic
1574 /// memory operation. Returns nullptr if the target cannot create a result
1575 /// from the given intrinsic.
1577 Type *ExpectedType) const;
1578
1579 /// \returns The type to use in a loop expansion of a memcpy call.
1581 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1582 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
1583 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1584
1585 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1586 /// \param RemainingBytes The number of bytes to copy.
1587 ///
1588 /// Calculates the operand types to use when copying \p RemainingBytes of
1589 /// memory, where source and destination alignments are \p SrcAlign and
1590 /// \p DestAlign respectively.
1592 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1593 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1594 Align SrcAlign, Align DestAlign,
1595 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1596
1597 /// \returns True if the two functions have compatible attributes for inlining
1598 /// purposes.
1599 bool areInlineCompatible(const Function *Caller,
1600 const Function *Callee) const;
1601
1602 /// Returns a penalty for invoking call \p Call in \p F.
1603 /// For example, if a function F calls a function G, which in turn calls
1604 /// function H, then getInlineCallPenalty(F, H()) would return the
1605 /// penalty of calling H from F, e.g. after inlining G into F.
1606 /// \p DefaultCallPenalty is passed to give a default penalty that
1607 /// the target can amend or override.
1608 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1609 unsigned DefaultCallPenalty) const;
1610
1611 /// \returns True if the caller and callee agree on how \p Types will be
1612 /// passed to or returned from the callee.
1613 /// to the callee.
1614 /// \param Types List of types to check.
1615 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1616 const ArrayRef<Type *> &Types) const;
1617
1618 /// The type of load/store indexing.
1620 MIM_Unindexed, ///< No indexing.
1621 MIM_PreInc, ///< Pre-incrementing.
1622 MIM_PreDec, ///< Pre-decrementing.
1623 MIM_PostInc, ///< Post-incrementing.
1624 MIM_PostDec ///< Post-decrementing.
1626
1627 /// \returns True if the specified indexed load for the given type is legal.
1628 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1629
1630 /// \returns True if the specified indexed store for the given type is legal.
1631 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1632
1633 /// \returns The bitwidth of the largest vector type that should be used to
1634 /// load/store in the given address space.
1635 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1636
1637 /// \returns True if the load instruction is legal to vectorize.
1638 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1639
1640 /// \returns True if the store instruction is legal to vectorize.
1641 bool isLegalToVectorizeStore(StoreInst *SI) const;
1642
1643 /// \returns True if it is legal to vectorize the given load chain.
1644 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1645 unsigned AddrSpace) const;
1646
1647 /// \returns True if it is legal to vectorize the given store chain.
1648 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1649 unsigned AddrSpace) const;
1650
1651 /// \returns True if it is legal to vectorize the given reduction kind.
1653 ElementCount VF) const;
1654
1655 /// \returns True if the given type is supported for scalable vectors
1657
1658 /// \returns The new vector factor value if the target doesn't support \p
1659 /// SizeInBytes loads or has a better vector factor.
1660 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1661 unsigned ChainSizeInBytes,
1662 VectorType *VecTy) const;
1663
1664 /// \returns The new vector factor value if the target doesn't support \p
1665 /// SizeInBytes stores or has a better vector factor.
1666 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1667 unsigned ChainSizeInBytes,
1668 VectorType *VecTy) const;
1669
1670 /// Flags describing the kind of vector reduction.
1672 ReductionFlags() = default;
1673 bool IsMaxOp =
1674 false; ///< If the op a min/max kind, true if it's a max operation.
1675 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1676 bool NoNaN =
1677 false; ///< If op is an fp min/max, whether NaNs may be present.
1678 };
1679
1680 /// \returns True if the targets prefers fixed width vectorization if the
1681 /// loop vectorizer's cost-model assigns an equal cost to the fixed and
1682 /// scalable version of the vectorized loop.
1684
1685 /// \returns True if the target prefers reductions in loop.
1686 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1687 ReductionFlags Flags) const;
1688
1689 /// \returns True if the target prefers reductions select kept in the loop
1690 /// when tail folding. i.e.
1691 /// loop:
1692 /// p = phi (0, s)
1693 /// a = add (p, x)
1694 /// s = select (mask, a, p)
1695 /// vecreduce.add(s)
1696 ///
1697 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1698 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1699 /// by the target, this can lead to cleaner code generation.
1700 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1701 ReductionFlags Flags) const;
1702
1703 /// Return true if the loop vectorizer should consider vectorizing an
1704 /// otherwise scalar epilogue loop.
1705 bool preferEpilogueVectorization() const;
1706
1707 /// \returns True if the target wants to expand the given reduction intrinsic
1708 /// into a shuffle sequence.
1709 bool shouldExpandReduction(const IntrinsicInst *II) const;
1710
1712
1713 /// \returns The shuffle sequence pattern used to expand the given reduction
1714 /// intrinsic.
1717
1718 /// \returns the size cost of rematerializing a GlobalValue address relative
1719 /// to a stack reload.
1720 unsigned getGISelRematGlobalCost() const;
1721
1722 /// \returns the lower bound of a trip count to decide on vectorization
1723 /// while tail-folding.
1724 unsigned getMinTripCountTailFoldingThreshold() const;
1725
1726 /// \returns True if the target supports scalable vectors.
1727 bool supportsScalableVectors() const;
1728
1729 /// \return true when scalable vectorization is preferred.
1730 bool enableScalableVectorization() const;
1731
1732 /// \name Vector Predication Information
1733 /// @{
1734 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1735 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1736 /// Reference - "Vector Predication Intrinsics").
1737 /// Use of %evl is discouraged when that is not the case.
1738 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1739 Align Alignment) const;
1740
1743 // keep the predicating parameter
1745 // where legal, discard the predicate parameter
1747 // transform into something else that is also predicating
1748 Convert = 2
1750
1751 // How to transform the EVL parameter.
1752 // Legal: keep the EVL parameter as it is.
1753 // Discard: Ignore the EVL parameter where it is safe to do so.
1754 // Convert: Fold the EVL into the mask parameter.
1756
1757 // How to transform the operator.
1758 // Legal: The target supports this operator.
1759 // Convert: Convert this to a non-VP operation.
1760 // The 'Discard' strategy is invalid.
1762
1763 bool shouldDoNothing() const {
1764 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1765 }
1768 };
1769
1770 /// \returns How the target needs this vector-predicated operation to be
1771 /// transformed.
1773 /// @}
1774
1775 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1776 /// state.
1777 ///
1778 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1779 /// node containing a jump table in a format suitable for the target, so it
1780 /// needs to know what format of jump table it can legally use.
1781 ///
1782 /// For non-Arm targets, this function isn't used. It defaults to returning
1783 /// false, but it shouldn't matter what it returns anyway.
1784 bool hasArmWideBranch(bool Thumb) const;
1785
1786 /// \return The maximum number of function arguments the target supports.
1787 unsigned getMaxNumArgs() const;
1788
1789 /// @}
1790
1791private:
1792 /// The abstract base class used to type erase specific TTI
1793 /// implementations.
1794 class Concept;
1795
1796 /// The template model for the base class which wraps a concrete
1797 /// implementation in a type erased interface.
1798 template <typename T> class Model;
1799
1800 std::unique_ptr<Concept> TTIImpl;
1801};
1802
1804public:
1805 virtual ~Concept() = 0;
1806 virtual const DataLayout &getDataLayout() const = 0;
1807 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1809 Type *AccessType,
1811 virtual InstructionCost
1813 const TTI::PointersChainInfo &Info, Type *AccessTy,
1815 virtual unsigned getInliningThresholdMultiplier() const = 0;
1817 virtual unsigned
1819 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1820 virtual int getInlinerVectorBonusPercent() const = 0;
1821 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1822 const AllocaInst *AI) const = 0;
1825 virtual unsigned
1827 ProfileSummaryInfo *PSI,
1828 BlockFrequencyInfo *BFI) = 0;
1834 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1835 virtual bool isSourceOfDivergence(const Value *V) = 0;
1836 virtual bool isAlwaysUniform(const Value *V) = 0;
1837 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1838 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1839 virtual unsigned getFlatAddressSpace() = 0;
1841 Intrinsic::ID IID) const = 0;
1842 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1843 virtual bool
1845 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1846 virtual bool isSingleThreaded() const = 0;
1847 virtual std::pair<const Value *, unsigned>
1848 getPredicatedAddrSpace(const Value *V) const = 0;
1850 Value *OldV,
1851 Value *NewV) const = 0;
1852 virtual bool isLoweredToCall(const Function *F) = 0;
1855 OptimizationRemarkEmitter *ORE) = 0;
1857 PeelingPreferences &PP) = 0;
1859 AssumptionCache &AC,
1860 TargetLibraryInfo *LibInfo,
1861 HardwareLoopInfo &HWLoopInfo) = 0;
1863 virtual TailFoldingStyle
1864 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1865 virtual std::optional<Instruction *> instCombineIntrinsic(
1866 InstCombiner &IC, IntrinsicInst &II) = 0;
1867 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1868 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1869 KnownBits & Known, bool &KnownBitsComputed) = 0;
1870 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1871 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1872 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1873 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1874 SimplifyAndSetOp) = 0;
1875 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1876 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
1877 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1878 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1879 int64_t BaseOffset, bool HasBaseReg,
1880 int64_t Scale, unsigned AddrSpace,
1881 Instruction *I,
1882 int64_t ScalableOffset) = 0;
1884 const TargetTransformInfo::LSRCost &C2) = 0;
1885 virtual bool isNumRegsMajorCostOfLSR() = 0;
1888 virtual bool canMacroFuseCmp() = 0;
1889 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1891 TargetLibraryInfo *LibInfo) = 0;
1892 virtual AddressingModeKind
1894 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1895 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1896 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1897 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1898 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1899 ElementCount NumElements) const = 0;
1900 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1901 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1903 Align Alignment) = 0;
1905 Align Alignment) = 0;
1906 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1907 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1908 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1909 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) = 0;
1910 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1911 unsigned Opcode1,
1912 const SmallBitVector &OpcodeMask) const = 0;
1913 virtual bool enableOrderedReductions() = 0;
1914 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1915 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1918 StackOffset BaseOffset,
1919 bool HasBaseReg, int64_t Scale,
1920 unsigned AddrSpace) = 0;
1921 virtual bool LSRWithInstrQueries() = 0;
1922 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1924 virtual bool useAA() = 0;
1925 virtual bool isTypeLegal(Type *Ty) = 0;
1926 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1927 virtual bool shouldBuildLookupTables() = 0;
1929 virtual bool shouldBuildRelLookupTables() = 0;
1930 virtual bool useColdCCForColdCall(Function &F) = 0;
1932 const APInt &DemandedElts,
1933 bool Insert, bool Extract,
1935 virtual InstructionCost
1937 ArrayRef<Type *> Tys,
1940 virtual bool supportsTailCalls() = 0;
1941 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1942 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1944 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1945 virtual bool enableSelectOptimize() = 0;
1951 unsigned BitWidth,
1952 unsigned AddressSpace,
1953 Align Alignment,
1954 unsigned *Fast) = 0;
1955 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1956 virtual bool haveFastSqrt(Type *Ty) = 0;
1958 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1960 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1961 const APInt &Imm, Type *Ty) = 0;
1962 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1964 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1965 const APInt &Imm, Type *Ty,
1967 Instruction *Inst = nullptr) = 0;
1969 const APInt &Imm, Type *Ty,
1972 const Function &Fn) const = 0;
1973 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1974 virtual bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const = 0;
1975 virtual unsigned getRegisterClassForType(bool Vector,
1976 Type *Ty = nullptr) const = 0;
1977 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1979 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1980 virtual std::optional<unsigned> getMaxVScale() const = 0;
1981 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1982 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1983 virtual bool
1985 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1986 bool IsScalable) const = 0;
1987 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1988 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1989 Type *ScalarValTy) const = 0;
1991 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1992 virtual unsigned getCacheLineSize() const = 0;
1993 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1994 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1995 const = 0;
1996 virtual std::optional<unsigned> getMinPageSize() const = 0;
1997
1998 /// \return How much before a load we should place the prefetch
1999 /// instruction. This is currently measured in number of
2000 /// instructions.
2001 virtual unsigned getPrefetchDistance() const = 0;
2002
2003 /// \return Some HW prefetchers can handle accesses up to a certain
2004 /// constant stride. This is the minimum stride in bytes where it
2005 /// makes sense to start adding SW prefetches. The default is 1,
2006 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
2007 /// even below the HW prefetcher limit, and the arguments provided are
2008 /// meant to serve as a basis for deciding this for a particular loop.
2009 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2010 unsigned NumStridedMemAccesses,
2011 unsigned NumPrefetches,
2012 bool HasCall) const = 0;
2013
2014 /// \return The maximum number of iterations to prefetch ahead. If
2015 /// the required number of iterations is more than this number, no
2016 /// prefetching is performed.
2017 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
2018
2019 /// \return True if prefetching should also be done for writes.
2020 virtual bool enableWritePrefetching() const = 0;
2021
2022 /// \return if target want to issue a prefetch in address space \p AS.
2023 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
2024
2025 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2027 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2028 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2029 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2031 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2032 const SmallBitVector &OpcodeMask,
2034
2035 virtual InstructionCost
2038 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2039 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2040 Type *Src, CastContextHint CCH,
2042 const Instruction *I) = 0;
2043 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2044 VectorType *VecTy,
2045 unsigned Index) = 0;
2046 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2048 const Instruction *I = nullptr) = 0;
2049 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2050 Type *CondTy,
2051 CmpInst::Predicate VecPred,
2053 const Instruction *I) = 0;
2054 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2056 unsigned Index, Value *Op0,
2057 Value *Op1) = 0;
2060 unsigned Index) = 0;
2061
2062 virtual InstructionCost
2063 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2064 const APInt &DemandedDstElts,
2066
2067 virtual InstructionCost
2068 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2070 OperandValueInfo OpInfo, const Instruction *I) = 0;
2071 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2072 Align Alignment,
2073 unsigned AddressSpace,
2075 const Instruction *I) = 0;
2076 virtual InstructionCost
2077 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2078 unsigned AddressSpace,
2080 virtual InstructionCost
2081 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2082 bool VariableMask, Align Alignment,
2084 const Instruction *I = nullptr) = 0;
2085 virtual InstructionCost
2086 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2087 bool VariableMask, Align Alignment,
2089 const Instruction *I = nullptr) = 0;
2090
2092 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2093 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2094 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2095 virtual InstructionCost
2097 std::optional<FastMathFlags> FMF,
2099 virtual InstructionCost
2103 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2104 FastMathFlags FMF,
2107 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2109 virtual InstructionCost
2113 ArrayRef<Type *> Tys,
2115 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2116 virtual InstructionCost
2118 virtual InstructionCost
2121 MemIntrinsicInfo &Info) = 0;
2122 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2124 Type *ExpectedType) = 0;
2126 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2127 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2128 std::optional<uint32_t> AtomicElementSize) const = 0;
2129
2131 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2132 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2133 Align SrcAlign, Align DestAlign,
2134 std::optional<uint32_t> AtomicCpySize) const = 0;
2135 virtual bool areInlineCompatible(const Function *Caller,
2136 const Function *Callee) const = 0;
2137 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2138 unsigned DefaultCallPenalty) const = 0;
2139 virtual bool areTypesABICompatible(const Function *Caller,
2140 const Function *Callee,
2141 const ArrayRef<Type *> &Types) const = 0;
2142 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2143 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2144 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2145 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2146 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2147 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2148 Align Alignment,
2149 unsigned AddrSpace) const = 0;
2150 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2151 Align Alignment,
2152 unsigned AddrSpace) const = 0;
2154 ElementCount VF) const = 0;
2155 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2156 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2157 unsigned ChainSizeInBytes,
2158 VectorType *VecTy) const = 0;
2159 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2160 unsigned ChainSizeInBytes,
2161 VectorType *VecTy) const = 0;
2162 virtual bool preferFixedOverScalableIfEqualCost() const = 0;
2163 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2164 ReductionFlags) const = 0;
2165 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2166 ReductionFlags) const = 0;
2167 virtual bool preferEpilogueVectorization() const = 0;
2168
2169 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2170 virtual ReductionShuffle
2172 virtual unsigned getGISelRematGlobalCost() const = 0;
2173 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2174 virtual bool enableScalableVectorization() const = 0;
2175 virtual bool supportsScalableVectors() const = 0;
2176 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2177 Align Alignment) const = 0;
2178 virtual VPLegalization
2180 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2181 virtual unsigned getMaxNumArgs() const = 0;
2182};
2183
2184template <typename T>
2185class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2186 T Impl;
2187
2188public:
2189 Model(T Impl) : Impl(std::move(Impl)) {}
2190 ~Model() override = default;
2191
2192 const DataLayout &getDataLayout() const override {
2193 return Impl.getDataLayout();
2194 }
2195
2196 InstructionCost
2197 getGEPCost(Type *PointeeType, const Value *Ptr,
2198 ArrayRef<const Value *> Operands, Type *AccessType,
2200 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2201 }
2202 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2203 const Value *Base,
2204 const PointersChainInfo &Info,
2205 Type *AccessTy,
2206 TargetCostKind CostKind) override {
2207 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2208 }
2209 unsigned getInliningThresholdMultiplier() const override {
2210 return Impl.getInliningThresholdMultiplier();
2211 }
2212 unsigned adjustInliningThreshold(const CallBase *CB) override {
2213 return Impl.adjustInliningThreshold(CB);
2214 }
2215 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2216 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2217 }
2218 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2219 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2220 }
2221 int getInlinerVectorBonusPercent() const override {
2222 return Impl.getInlinerVectorBonusPercent();
2223 }
2224 unsigned getCallerAllocaCost(const CallBase *CB,
2225 const AllocaInst *AI) const override {
2226 return Impl.getCallerAllocaCost(CB, AI);
2227 }
2228 InstructionCost getMemcpyCost(const Instruction *I) override {
2229 return Impl.getMemcpyCost(I);
2230 }
2231
2232 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2233 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2234 }
2235
2236 InstructionCost getInstructionCost(const User *U,
2237 ArrayRef<const Value *> Operands,
2238 TargetCostKind CostKind) override {
2239 return Impl.getInstructionCost(U, Operands, CostKind);
2240 }
2241 BranchProbability getPredictableBranchThreshold() override {
2242 return Impl.getPredictableBranchThreshold();
2243 }
2244 InstructionCost getBranchMispredictPenalty() override {
2245 return Impl.getBranchMispredictPenalty();
2246 }
2247 bool hasBranchDivergence(const Function *F = nullptr) override {
2248 return Impl.hasBranchDivergence(F);
2249 }
2250 bool isSourceOfDivergence(const Value *V) override {
2251 return Impl.isSourceOfDivergence(V);
2252 }
2253
2254 bool isAlwaysUniform(const Value *V) override {
2255 return Impl.isAlwaysUniform(V);
2256 }
2257
2258 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2259 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2260 }
2261
2262 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2263 return Impl.addrspacesMayAlias(AS0, AS1);
2264 }
2265
2266 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2267
2268 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2269 Intrinsic::ID IID) const override {
2270 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2271 }
2272
2273 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2274 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2275 }
2276
2277 bool
2278 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2279 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2280 }
2281
2282 unsigned getAssumedAddrSpace(const Value *V) const override {
2283 return Impl.getAssumedAddrSpace(V);
2284 }
2285
2286 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2287
2288 std::pair<const Value *, unsigned>
2289 getPredicatedAddrSpace(const Value *V) const override {
2290 return Impl.getPredicatedAddrSpace(V);
2291 }
2292
2293 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2294 Value *NewV) const override {
2295 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2296 }
2297
2298 bool isLoweredToCall(const Function *F) override {
2299 return Impl.isLoweredToCall(F);
2300 }
2301 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2302 UnrollingPreferences &UP,
2303 OptimizationRemarkEmitter *ORE) override {
2304 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2305 }
2306 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2307 PeelingPreferences &PP) override {
2308 return Impl.getPeelingPreferences(L, SE, PP);
2309 }
2310 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2311 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2312 HardwareLoopInfo &HWLoopInfo) override {
2313 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2314 }
2315 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2316 return Impl.preferPredicateOverEpilogue(TFI);
2317 }
2319 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2320 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2321 }
2322 std::optional<Instruction *>
2323 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2324 return Impl.instCombineIntrinsic(IC, II);
2325 }
2326 std::optional<Value *>
2327 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2328 APInt DemandedMask, KnownBits &Known,
2329 bool &KnownBitsComputed) override {
2330 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2331 KnownBitsComputed);
2332 }
2333 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2334 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2335 APInt &UndefElts2, APInt &UndefElts3,
2336 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2337 SimplifyAndSetOp) override {
2338 return Impl.simplifyDemandedVectorEltsIntrinsic(
2339 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2340 SimplifyAndSetOp);
2341 }
2342 bool isLegalAddImmediate(int64_t Imm) override {
2343 return Impl.isLegalAddImmediate(Imm);
2344 }
2345 bool isLegalAddScalableImmediate(int64_t Imm) override {
2346 return Impl.isLegalAddScalableImmediate(Imm);
2347 }
2348 bool isLegalICmpImmediate(int64_t Imm) override {
2349 return Impl.isLegalICmpImmediate(Imm);
2350 }
2351 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2352 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2353 Instruction *I, int64_t ScalableOffset) override {
2354 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2355 AddrSpace, I, ScalableOffset);
2356 }
2357 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2358 const TargetTransformInfo::LSRCost &C2) override {
2359 return Impl.isLSRCostLess(C1, C2);
2360 }
2361 bool isNumRegsMajorCostOfLSR() override {
2362 return Impl.isNumRegsMajorCostOfLSR();
2363 }
2364 bool shouldDropLSRSolutionIfLessProfitable() const override {
2365 return Impl.shouldDropLSRSolutionIfLessProfitable();
2366 }
2367 bool isProfitableLSRChainElement(Instruction *I) override {
2368 return Impl.isProfitableLSRChainElement(I);
2369 }
2370 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2371 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2372 DominatorTree *DT, AssumptionCache *AC,
2373 TargetLibraryInfo *LibInfo) override {
2374 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2375 }
2377 getPreferredAddressingMode(const Loop *L,
2378 ScalarEvolution *SE) const override {
2379 return Impl.getPreferredAddressingMode(L, SE);
2380 }
2381 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2382 return Impl.isLegalMaskedStore(DataType, Alignment);
2383 }
2384 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2385 return Impl.isLegalMaskedLoad(DataType, Alignment);
2386 }
2387 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2388 return Impl.isLegalNTStore(DataType, Alignment);
2389 }
2390 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2391 return Impl.isLegalNTLoad(DataType, Alignment);
2392 }
2393 bool isLegalBroadcastLoad(Type *ElementTy,
2394 ElementCount NumElements) const override {
2395 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2396 }
2397 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2398 return Impl.isLegalMaskedScatter(DataType, Alignment);
2399 }
2400 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2401 return Impl.isLegalMaskedGather(DataType, Alignment);
2402 }
2403 bool forceScalarizeMaskedGather(VectorType *DataType,
2404 Align Alignment) override {
2405 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2406 }
2407 bool forceScalarizeMaskedScatter(VectorType *DataType,
2408 Align Alignment) override {
2409 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2410 }
2411 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2412 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2413 }
2414 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2415 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2416 }
2417 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2418 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2419 }
2420 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) override {
2421 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2422 }
2423 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2424 const SmallBitVector &OpcodeMask) const override {
2425 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2426 }
2427 bool enableOrderedReductions() override {
2428 return Impl.enableOrderedReductions();
2429 }
2430 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2431 return Impl.hasDivRemOp(DataType, IsSigned);
2432 }
2433 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2434 return Impl.hasVolatileVariant(I, AddrSpace);
2435 }
2436 bool prefersVectorizedAddressing() override {
2437 return Impl.prefersVectorizedAddressing();
2438 }
2439 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2440 StackOffset BaseOffset, bool HasBaseReg,
2441 int64_t Scale,
2442 unsigned AddrSpace) override {
2443 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2444 AddrSpace);
2445 }
2446 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2447 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2448 return Impl.isTruncateFree(Ty1, Ty2);
2449 }
2450 bool isProfitableToHoist(Instruction *I) override {
2451 return Impl.isProfitableToHoist(I);
2452 }
2453 bool useAA() override { return Impl.useAA(); }
2454 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2455 unsigned getRegUsageForType(Type *Ty) override {
2456 return Impl.getRegUsageForType(Ty);
2457 }
2458 bool shouldBuildLookupTables() override {
2459 return Impl.shouldBuildLookupTables();
2460 }
2461 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2462 return Impl.shouldBuildLookupTablesForConstant(C);
2463 }
2464 bool shouldBuildRelLookupTables() override {
2465 return Impl.shouldBuildRelLookupTables();
2466 }
2467 bool useColdCCForColdCall(Function &F) override {
2468 return Impl.useColdCCForColdCall(F);
2469 }
2470
2471 InstructionCost getScalarizationOverhead(VectorType *Ty,
2472 const APInt &DemandedElts,
2473 bool Insert, bool Extract,
2474 TargetCostKind CostKind) override {
2475 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2476 CostKind);
2477 }
2478 InstructionCost
2479 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2480 ArrayRef<Type *> Tys,
2481 TargetCostKind CostKind) override {
2482 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2483 }
2484
2485 bool supportsEfficientVectorElementLoadStore() override {
2486 return Impl.supportsEfficientVectorElementLoadStore();
2487 }
2488
2489 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2490 bool supportsTailCallFor(const CallBase *CB) override {
2491 return Impl.supportsTailCallFor(CB);
2492 }
2493
2494 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2495 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2496 }
2497 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2498 bool IsZeroCmp) const override {
2499 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2500 }
2501 bool enableSelectOptimize() override {
2502 return Impl.enableSelectOptimize();
2503 }
2504 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2505 return Impl.shouldTreatInstructionLikeSelect(I);
2506 }
2507 bool enableInterleavedAccessVectorization() override {
2508 return Impl.enableInterleavedAccessVectorization();
2509 }
2510 bool enableMaskedInterleavedAccessVectorization() override {
2511 return Impl.enableMaskedInterleavedAccessVectorization();
2512 }
2513 bool isFPVectorizationPotentiallyUnsafe() override {
2514 return Impl.isFPVectorizationPotentiallyUnsafe();
2515 }
2516 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2517 unsigned AddressSpace, Align Alignment,
2518 unsigned *Fast) override {
2519 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2520 Alignment, Fast);
2521 }
2522 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2523 return Impl.getPopcntSupport(IntTyWidthInBit);
2524 }
2525 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2526
2527 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2528 return Impl.isExpensiveToSpeculativelyExecute(I);
2529 }
2530
2531 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2532 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2533 }
2534
2535 InstructionCost getFPOpCost(Type *Ty) override {
2536 return Impl.getFPOpCost(Ty);
2537 }
2538
2539 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2540 const APInt &Imm, Type *Ty) override {
2541 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2542 }
2543 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2544 TargetCostKind CostKind) override {
2545 return Impl.getIntImmCost(Imm, Ty, CostKind);
2546 }
2547 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2548 const APInt &Imm, Type *Ty,
2550 Instruction *Inst = nullptr) override {
2551 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2552 }
2553 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2554 const APInt &Imm, Type *Ty,
2555 TargetCostKind CostKind) override {
2556 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2557 }
2558 bool preferToKeepConstantsAttached(const Instruction &Inst,
2559 const Function &Fn) const override {
2560 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2561 }
2562 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2563 return Impl.getNumberOfRegisters(ClassID);
2564 }
2565 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const override {
2566 return Impl.hasConditionalLoadStoreForType(Ty);
2567 }
2568 unsigned getRegisterClassForType(bool Vector,
2569 Type *Ty = nullptr) const override {
2570 return Impl.getRegisterClassForType(Vector, Ty);
2571 }
2572 const char *getRegisterClassName(unsigned ClassID) const override {
2573 return Impl.getRegisterClassName(ClassID);
2574 }
2575 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2576 return Impl.getRegisterBitWidth(K);
2577 }
2578 unsigned getMinVectorRegisterBitWidth() const override {
2579 return Impl.getMinVectorRegisterBitWidth();
2580 }
2581 std::optional<unsigned> getMaxVScale() const override {
2582 return Impl.getMaxVScale();
2583 }
2584 std::optional<unsigned> getVScaleForTuning() const override {
2585 return Impl.getVScaleForTuning();
2586 }
2587 bool isVScaleKnownToBeAPowerOfTwo() const override {
2588 return Impl.isVScaleKnownToBeAPowerOfTwo();
2589 }
2590 bool shouldMaximizeVectorBandwidth(
2591 TargetTransformInfo::RegisterKind K) const override {
2592 return Impl.shouldMaximizeVectorBandwidth(K);
2593 }
2594 ElementCount getMinimumVF(unsigned ElemWidth,
2595 bool IsScalable) const override {
2596 return Impl.getMinimumVF(ElemWidth, IsScalable);
2597 }
2598 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2599 return Impl.getMaximumVF(ElemWidth, Opcode);
2600 }
2601 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2602 Type *ScalarValTy) const override {
2603 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2604 }
2605 bool shouldConsiderAddressTypePromotion(
2606 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2607 return Impl.shouldConsiderAddressTypePromotion(
2608 I, AllowPromotionWithoutCommonHeader);
2609 }
2610 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2611 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2612 return Impl.getCacheSize(Level);
2613 }
2614 std::optional<unsigned>
2615 getCacheAssociativity(CacheLevel Level) const override {
2616 return Impl.getCacheAssociativity(Level);
2617 }
2618
2619 std::optional<unsigned> getMinPageSize() const override {
2620 return Impl.getMinPageSize();
2621 }
2622
2623 /// Return the preferred prefetch distance in terms of instructions.
2624 ///
2625 unsigned getPrefetchDistance() const override {
2626 return Impl.getPrefetchDistance();
2627 }
2628
2629 /// Return the minimum stride necessary to trigger software
2630 /// prefetching.
2631 ///
2632 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2633 unsigned NumStridedMemAccesses,
2634 unsigned NumPrefetches,
2635 bool HasCall) const override {
2636 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2637 NumPrefetches, HasCall);
2638 }
2639
2640 /// Return the maximum prefetch distance in terms of loop
2641 /// iterations.
2642 ///
2643 unsigned getMaxPrefetchIterationsAhead() const override {
2644 return Impl.getMaxPrefetchIterationsAhead();
2645 }
2646
2647 /// \return True if prefetching should also be done for writes.
2648 bool enableWritePrefetching() const override {
2649 return Impl.enableWritePrefetching();
2650 }
2651
2652 /// \return if target want to issue a prefetch in address space \p AS.
2653 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2654 return Impl.shouldPrefetchAddressSpace(AS);
2655 }
2656
2657 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2658 return Impl.getMaxInterleaveFactor(VF);
2659 }
2660 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2661 unsigned &JTSize,
2662 ProfileSummaryInfo *PSI,
2663 BlockFrequencyInfo *BFI) override {
2664 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2665 }
2666 InstructionCost getArithmeticInstrCost(
2667 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2668 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2669 ArrayRef<const Value *> Args,
2670 const Instruction *CxtI = nullptr) override {
2671 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2672 Args, CxtI);
2673 }
2674 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2675 unsigned Opcode1,
2676 const SmallBitVector &OpcodeMask,
2677 TTI::TargetCostKind CostKind) const override {
2678 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2679 }
2680
2681 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2682 ArrayRef<int> Mask,
2684 VectorType *SubTp,
2685 ArrayRef<const Value *> Args,
2686 const Instruction *CxtI) override {
2687 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2688 CxtI);
2689 }
2690 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2691 CastContextHint CCH,
2693 const Instruction *I) override {
2694 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2695 }
2696 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2697 VectorType *VecTy,
2698 unsigned Index) override {
2699 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2700 }
2701 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2702 const Instruction *I = nullptr) override {
2703 return Impl.getCFInstrCost(Opcode, CostKind, I);
2704 }
2705 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2706 CmpInst::Predicate VecPred,
2708 const Instruction *I) override {
2709 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2710 }
2711 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2713 unsigned Index, Value *Op0,
2714 Value *Op1) override {
2715 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2716 }
2717 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2719 unsigned Index) override {
2720 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2721 }
2722 InstructionCost
2723 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2724 const APInt &DemandedDstElts,
2725 TTI::TargetCostKind CostKind) override {
2726 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2727 DemandedDstElts, CostKind);
2728 }
2729 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2730 unsigned AddressSpace,
2732 OperandValueInfo OpInfo,
2733 const Instruction *I) override {
2734 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2735 OpInfo, I);
2736 }
2737 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2738 unsigned AddressSpace,
2740 const Instruction *I) override {
2741 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2742 CostKind, I);
2743 }
2744 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2745 Align Alignment, unsigned AddressSpace,
2746 TTI::TargetCostKind CostKind) override {
2747 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2748 CostKind);
2749 }
2750 InstructionCost
2751 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2752 bool VariableMask, Align Alignment,
2754 const Instruction *I = nullptr) override {
2755 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2756 Alignment, CostKind, I);
2757 }
2758 InstructionCost
2759 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2760 bool VariableMask, Align Alignment,
2762 const Instruction *I = nullptr) override {
2763 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2764 Alignment, CostKind, I);
2765 }
2766 InstructionCost getInterleavedMemoryOpCost(
2767 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2768 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2769 bool UseMaskForCond, bool UseMaskForGaps) override {
2770 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2771 Alignment, AddressSpace, CostKind,
2772 UseMaskForCond, UseMaskForGaps);
2773 }
2774 InstructionCost
2775 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2776 std::optional<FastMathFlags> FMF,
2777 TTI::TargetCostKind CostKind) override {
2778 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2779 }
2780 InstructionCost
2781 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2782 TTI::TargetCostKind CostKind) override {
2783 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2784 }
2785 InstructionCost
2786 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2787 VectorType *Ty, FastMathFlags FMF,
2788 TTI::TargetCostKind CostKind) override {
2789 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2790 CostKind);
2791 }
2792 InstructionCost
2793 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2794 TTI::TargetCostKind CostKind) override {
2795 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2796 }
2797 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2798 TTI::TargetCostKind CostKind) override {
2799 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2800 }
2801 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2802 ArrayRef<Type *> Tys,
2803 TTI::TargetCostKind CostKind) override {
2804 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2805 }
2806 unsigned getNumberOfParts(Type *Tp) override {
2807 return Impl.getNumberOfParts(Tp);
2808 }
2809 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2810 const SCEV *Ptr) override {
2811 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2812 }
2813 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2814 return Impl.getCostOfKeepingLiveOverCall(Tys);
2815 }
2816 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2817 MemIntrinsicInfo &Info) override {
2818 return Impl.getTgtMemIntrinsic(Inst, Info);
2819 }
2820 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2821 return Impl.getAtomicMemIntrinsicMaxElementSize();
2822 }
2823 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2824 Type *ExpectedType) override {
2825 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2826 }
2827 Type *getMemcpyLoopLoweringType(
2828 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2829 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2830 std::optional<uint32_t> AtomicElementSize) const override {
2831 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2832 DestAddrSpace, SrcAlign, DestAlign,
2833 AtomicElementSize);
2834 }
2835 void getMemcpyLoopResidualLoweringType(
2836 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2837 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2838 Align SrcAlign, Align DestAlign,
2839 std::optional<uint32_t> AtomicCpySize) const override {
2840 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2841 SrcAddrSpace, DestAddrSpace,
2842 SrcAlign, DestAlign, AtomicCpySize);
2843 }
2844 bool areInlineCompatible(const Function *Caller,
2845 const Function *Callee) const override {
2846 return Impl.areInlineCompatible(Caller, Callee);
2847 }
2848 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2849 unsigned DefaultCallPenalty) const override {
2850 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2851 }
2852 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2853 const ArrayRef<Type *> &Types) const override {
2854 return Impl.areTypesABICompatible(Caller, Callee, Types);
2855 }
2856 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2857 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2858 }
2859 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2860 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2861 }
2862 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2863 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2864 }
2865 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2866 return Impl.isLegalToVectorizeLoad(LI);
2867 }
2868 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2869 return Impl.isLegalToVectorizeStore(SI);
2870 }
2871 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2872 unsigned AddrSpace) const override {
2873 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2874 AddrSpace);
2875 }
2876 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2877 unsigned AddrSpace) const override {
2878 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2879 AddrSpace);
2880 }
2881 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2882 ElementCount VF) const override {
2883 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2884 }
2885 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2886 return Impl.isElementTypeLegalForScalableVector(Ty);
2887 }
2888 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2889 unsigned ChainSizeInBytes,
2890 VectorType *VecTy) const override {
2891 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2892 }
2893 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2894 unsigned ChainSizeInBytes,
2895 VectorType *VecTy) const override {
2896 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2897 }
2898 bool preferFixedOverScalableIfEqualCost() const override {
2899 return Impl.preferFixedOverScalableIfEqualCost();
2900 }
2901 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2902 ReductionFlags Flags) const override {
2903 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2904 }
2905 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2906 ReductionFlags Flags) const override {
2907 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2908 }
2909 bool preferEpilogueVectorization() const override {
2910 return Impl.preferEpilogueVectorization();
2911 }
2912
2913 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2914 return Impl.shouldExpandReduction(II);
2915 }
2916
2918 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
2919 return Impl.getPreferredExpandedReductionShuffle(II);
2920 }
2921
2922 unsigned getGISelRematGlobalCost() const override {
2923 return Impl.getGISelRematGlobalCost();
2924 }
2925
2926 unsigned getMinTripCountTailFoldingThreshold() const override {
2927 return Impl.getMinTripCountTailFoldingThreshold();
2928 }
2929
2930 bool supportsScalableVectors() const override {
2931 return Impl.supportsScalableVectors();
2932 }
2933
2934 bool enableScalableVectorization() const override {
2935 return Impl.enableScalableVectorization();
2936 }
2937
2938 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2939 Align Alignment) const override {
2940 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2941 }
2942
2944 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2945 return Impl.getVPLegalizationStrategy(PI);
2946 }
2947
2948 bool hasArmWideBranch(bool Thumb) const override {
2949 return Impl.hasArmWideBranch(Thumb);
2950 }
2951
2952 unsigned getMaxNumArgs() const override {
2953 return Impl.getMaxNumArgs();
2954 }
2955};
2956
2957template <typename T>
2959 : TTIImpl(new Model<T>(Impl)) {}
2960
2961/// Analysis pass providing the \c TargetTransformInfo.
2962///
2963/// The core idea of the TargetIRAnalysis is to expose an interface through
2964/// which LLVM targets can analyze and provide information about the middle
2965/// end's target-independent IR. This supports use cases such as target-aware
2966/// cost modeling of IR constructs.
2967///
2968/// This is a function analysis because much of the cost modeling for targets
2969/// is done in a subtarget specific way and LLVM supports compiling different
2970/// functions targeting different subtargets in order to support runtime
2971/// dispatch according to the observed subtarget.
2972class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2973public:
2975
2976 /// Default construct a target IR analysis.
2977 ///
2978 /// This will use the module's datalayout to construct a baseline
2979 /// conservative TTI result.
2981
2982 /// Construct an IR analysis pass around a target-provide callback.
2983 ///
2984 /// The callback will be called with a particular function for which the TTI
2985 /// is needed and must return a TTI object for that function.
2986 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2987
2988 // Value semantics. We spell out the constructors for MSVC.
2990 : TTICallback(Arg.TTICallback) {}
2992 : TTICallback(std::move(Arg.TTICallback)) {}
2994 TTICallback = RHS.TTICallback;
2995 return *this;
2996 }
2998 TTICallback = std::move(RHS.TTICallback);
2999 return *this;
3000 }
3001
3003
3004private:
3006 static AnalysisKey Key;
3007
3008 /// The callback used to produce a result.
3009 ///
3010 /// We use a completely opaque callback so that targets can provide whatever
3011 /// mechanism they desire for constructing the TTI for a given function.
3012 ///
3013 /// FIXME: Should we really use std::function? It's relatively inefficient.
3014 /// It might be possible to arrange for even stateful callbacks to outlive
3015 /// the analysis and thus use a function_ref which would be lighter weight.
3016 /// This may also be less error prone as the callback is likely to reference
3017 /// the external TargetMachine, and that reference needs to never dangle.
3018 std::function<Result(const Function &)> TTICallback;
3019
3020 /// Helper function used as the callback in the default constructor.
3021 static Result getDefaultTTI(const Function &F);
3022};
3023
3024/// Wrapper pass for TargetTransformInfo.
3025///
3026/// This pass can be constructed from a TTI object which it stores internally
3027/// and is queried by passes.
3029 TargetIRAnalysis TIRA;
3030 std::optional<TargetTransformInfo> TTI;
3031
3032 virtual void anchor();
3033
3034public:
3035 static char ID;
3036
3037 /// We must provide a default constructor for the pass but it should
3038 /// never be used.
3039 ///
3040 /// Use the constructor below or call one of the creation routines.
3042
3044
3046};
3047
3048/// Create an analysis pass wrapper around a TTI object.
3049///
3050/// This analysis pass just holds the TTI instance and makes it available to
3051/// clients.
3053
3054} // namespace llvm
3055
3056#endif
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:61
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:281
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:610
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:70
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual bool preferFixedOverScalableIfEqualCost() const =0
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual InstructionCost getBranchMispredictPenalty()=0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)=0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool isLegalAddScalableImmediate(int64_t Imm)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const =0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool shouldDropLSRSolutionIfLessProfitable() const =0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args, const Instruction *CxtI)=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I, int64_t ScalableOffset)=0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ None
Definition: CodeGenData.h:101
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1856
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)