LLVM 18.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class CallBase;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class User;
67class Value;
68class VPIntrinsic;
69struct KnownBits;
70
71/// Information about a load/store intrinsic defined by the target.
73 /// This is the pointer that the intrinsic is loading from or storing to.
74 /// If this is non-null, then analysis/optimization passes can assume that
75 /// this intrinsic is functionally equivalent to a load/store from this
76 /// pointer.
77 Value *PtrVal = nullptr;
78
79 // Ordering for atomic operations.
81
82 // Same Id is set by the target for corresponding load/store intrinsics.
83 unsigned short MatchingId = 0;
84
85 bool ReadMem = false;
86 bool WriteMem = false;
87 bool IsVolatile = false;
88
89 bool isUnordered() const {
93 }
94};
95
96/// Attributes of a target dependent hardware loop.
98 HardwareLoopInfo() = delete;
100 Loop *L = nullptr;
103 const SCEV *ExitCount = nullptr;
105 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
106 // value in every iteration.
107 bool IsNestingLegal = false; // Can a hardware loop be a parent to
108 // another hardware loop?
109 bool CounterInReg = false; // Should loop counter be updated in
110 // the loop via a phi?
111 bool PerformEntryTest = false; // Generate the intrinsic which also performs
112 // icmp ne zero on the loop counter value and
113 // produces an i1 to guard the loop entry.
115 DominatorTree &DT, bool ForceNestedLoop = false,
116 bool ForceHardwareLoopPHI = false);
117 bool canAnalyze(LoopInfo &LI);
118};
119
121 const IntrinsicInst *II = nullptr;
122 Type *RetTy = nullptr;
123 Intrinsic::ID IID;
124 SmallVector<Type *, 4> ParamTys;
126 FastMathFlags FMF;
127 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128 // arguments and the return value will be computed based on types.
129 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131public:
133 Intrinsic::ID Id, const CallBase &CI,
135 bool TypeBasedOnly = false);
136
138 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
139 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
141
144
148 const IntrinsicInst *I = nullptr,
150
151 Intrinsic::ID getID() const { return IID; }
152 const IntrinsicInst *getInst() const { return II; }
153 Type *getReturnType() const { return RetTy; }
154 FastMathFlags getFlags() const { return FMF; }
155 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
157 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
158
159 bool isTypeBasedOnly() const {
160 return Arguments.empty();
161 }
162
163 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164};
165
167 /// Don't use tail folding
168 None,
169 /// Use predicate only to mask operations on data in the loop.
170 /// When the VL is not known to be a power-of-2, this method requires a
171 /// runtime overflow check for the i + VL in the loop because it compares the
172 /// scalar induction variable against the tripcount rounded up by VL which may
173 /// overflow. When the VL is a power-of-2, both the increment and uprounded
174 /// tripcount will overflow to 0, which does not require a runtime check
175 /// since the loop is exited when the loop induction variable equals the
176 /// uprounded trip-count, which are both 0.
177 Data,
178 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179 /// calculate the mask and instead implements this with a
180 /// splat/stepvector/cmp.
181 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182 /// active.lane.mask intrinsic when it is not natively supported?
184 /// Use predicate to control both data and control flow.
185 /// This method always requires a runtime overflow check for the i + VL
186 /// increment inside the loop, because it uses the result direclty in the
187 /// active.lane.mask to calculate the mask for the next iteration. If the
188 /// increment overflows, the mask is no longer correct.
190 /// Use predicate to control both data and control flow, but modify
191 /// the trip count so that a runtime overflow check can be avoided
192 /// and such that the scalar epilogue loop can always be removed.
194};
195
202 : TLI(TLI), LVL(LVL), IAI(IAI) {}
203};
204
205class TargetTransformInfo;
207
208/// This pass provides access to the codegen interfaces that are needed
209/// for IR-level transformations.
211public:
212 /// Construct a TTI object using a type implementing the \c Concept
213 /// API below.
214 ///
215 /// This is used by targets to construct a TTI wrapping their target-specific
216 /// implementation that encodes appropriate costs for their target.
217 template <typename T> TargetTransformInfo(T Impl);
218
219 /// Construct a baseline TTI object using a minimal implementation of
220 /// the \c Concept API below.
221 ///
222 /// The TTI implementation will reflect the information in the DataLayout
223 /// provided if non-null.
224 explicit TargetTransformInfo(const DataLayout &DL);
225
226 // Provide move semantics.
229
230 // We need to define the destructor out-of-line to define our sub-classes
231 // out-of-line.
233
234 /// Handle the invalidation of this information.
235 ///
236 /// When used as a result of \c TargetIRAnalysis this method will be called
237 /// when the function this was computed for changes. When it returns false,
238 /// the information is preserved across those changes.
241 // FIXME: We should probably in some way ensure that the subtarget
242 // information for a function hasn't changed.
243 return false;
244 }
245
246 /// \name Generic Target Information
247 /// @{
248
249 /// The kind of cost model.
250 ///
251 /// There are several different cost models that can be customized by the
252 /// target. The normalization of each cost model may be target specific.
253 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
254 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
256 TCK_RecipThroughput, ///< Reciprocal throughput.
257 TCK_Latency, ///< The latency of instruction.
258 TCK_CodeSize, ///< Instruction code size.
259 TCK_SizeAndLatency ///< The weighted sum of size and latency.
260 };
261
262 /// Underlying constants for 'cost' values in this interface.
263 ///
264 /// Many APIs in this interface return a cost. This enum defines the
265 /// fundamental values that should be used to interpret (and produce) those
266 /// costs. The costs are returned as an int rather than a member of this
267 /// enumeration because it is expected that the cost of one IR instruction
268 /// may have a multiplicative factor to it or otherwise won't fit directly
269 /// into the enum. Moreover, it is common to sum or average costs which works
270 /// better as simple integral values. Thus this enum only provides constants.
271 /// Also note that the returned costs are signed integers to make it natural
272 /// to add, subtract, and test with zero (a common boundary condition). It is
273 /// not expected that 2^32 is a realistic cost to be modeling at any point.
274 ///
275 /// Note that these costs should usually reflect the intersection of code-size
276 /// cost and execution cost. A free instruction is typically one that folds
277 /// into another instruction. For example, reg-to-reg moves can often be
278 /// skipped by renaming the registers in the CPU, but they still are encoded
279 /// and thus wouldn't be considered 'free' here.
281 TCC_Free = 0, ///< Expected to fold away in lowering.
282 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
283 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
284 };
285
286 /// Estimate the cost of a GEP operation when lowered.
287 ///
288 /// \p PointeeType is the source element type of the GEP.
289 /// \p Ptr is the base pointer operand.
290 /// \p Operands is the list of indices following the base pointer.
291 ///
292 /// \p AccessType is a hint as to what type of memory might be accessed by
293 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
294 /// folded into the addressing mode of a load/store. If AccessType is null,
295 /// then the resulting target type based off of PointeeType will be used as an
296 /// approximation.
298 getGEPCost(Type *PointeeType, const Value *Ptr,
299 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
301
302 /// Describe known properties for a set of pointers.
304 /// All the GEPs in a set have same base address.
305 unsigned IsSameBaseAddress : 1;
306 /// These properties only valid if SameBaseAddress is set.
307 /// True if all pointers are separated by a unit stride.
308 unsigned IsUnitStride : 1;
309 /// True if distance between any two neigbouring pointers is a known value.
310 unsigned IsKnownStride : 1;
311 unsigned Reserved : 29;
312
313 bool isSameBase() const { return IsSameBaseAddress; }
314 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
316
318 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
319 /*IsKnownStride=*/1, 0};
320 }
322 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
323 /*IsKnownStride=*/1, 0};
324 }
326 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
327 /*IsKnownStride=*/0, 0};
328 }
329 };
330 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
331
332 /// Estimate the cost of a chain of pointers (typically pointer operands of a
333 /// chain of loads or stores within same block) operations set when lowered.
334 /// \p AccessTy is the type of the loads/stores that will ultimately use the
335 /// \p Ptrs.
338 const PointersChainInfo &Info, Type *AccessTy,
340
341 ) const;
342
343 /// \returns A value by which our inlining threshold should be multiplied.
344 /// This is primarily used to bump up the inlining threshold wholesale on
345 /// targets where calls are unusually expensive.
346 ///
347 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
348 /// individual classes of instructions would be better.
349 unsigned getInliningThresholdMultiplier() const;
350
351 /// \returns A value to be added to the inlining threshold.
352 unsigned adjustInliningThreshold(const CallBase *CB) const;
353
354 /// \returns The cost of having an Alloca in the caller if not inlined, to be
355 /// added to the threshold
356 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
357
358 /// \returns Vector bonus in percent.
359 ///
360 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
361 /// and apply this bonus based on the percentage of vector instructions. A
362 /// bonus is applied if the vector instructions exceed 50% and half that
363 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
364 /// arbitrary and evolved over time by accident as much as because they are
365 /// principled bonuses.
366 /// FIXME: It would be nice to base the bonus values on something more
367 /// scientific. A target may has no bonus on vector instructions.
369
370 /// \return the expected cost of a memcpy, which could e.g. depend on the
371 /// source/destination type and alignment and the number of bytes copied.
373
374 /// Returns the maximum memset / memcpy size in bytes that still makes it
375 /// profitable to inline the call.
377
378 /// \return The estimated number of case clusters when lowering \p 'SI'.
379 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
380 /// table.
382 unsigned &JTSize,
384 BlockFrequencyInfo *BFI) const;
385
386 /// Estimate the cost of a given IR user when lowered.
387 ///
388 /// This can estimate the cost of either a ConstantExpr or Instruction when
389 /// lowered.
390 ///
391 /// \p Operands is a list of operands which can be a result of transformations
392 /// of the current operands. The number of the operands on the list must equal
393 /// to the number of the current operands the IR user has. Their order on the
394 /// list must be the same as the order of the current operands the IR user
395 /// has.
396 ///
397 /// The returned cost is defined in terms of \c TargetCostConstants, see its
398 /// comments for a detailed explanation of the cost values.
402
403 /// This is a helper function which calls the three-argument
404 /// getInstructionCost with \p Operands which are the current operands U has.
406 TargetCostKind CostKind) const {
407 SmallVector<const Value *, 4> Operands(U->operand_values());
409 }
410
411 /// If a branch or a select condition is skewed in one direction by more than
412 /// this factor, it is very likely to be predicted correctly.
414
415 /// Return true if branch divergence exists.
416 ///
417 /// Branch divergence has a significantly negative impact on GPU performance
418 /// when threads in the same wavefront take different paths due to conditional
419 /// branches.
420 ///
421 /// If \p F is passed, provides a context function. If \p F is known to only
422 /// execute in a single threaded environment, the target may choose to skip
423 /// uniformity analysis and assume all values are uniform.
424 bool hasBranchDivergence(const Function *F = nullptr) const;
425
426 /// Returns whether V is a source of divergence.
427 ///
428 /// This function provides the target-dependent information for
429 /// the target-independent UniformityAnalysis.
430 bool isSourceOfDivergence(const Value *V) const;
431
432 // Returns true for the target specific
433 // set of operations which produce uniform result
434 // even taking non-uniform arguments
435 bool isAlwaysUniform(const Value *V) const;
436
437 /// Query the target whether the specified address space cast from FromAS to
438 /// ToAS is valid.
439 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
440
441 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
442 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
443
444 /// Returns the address space ID for a target's 'flat' address space. Note
445 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
446 /// refers to as the generic address space. The flat address space is a
447 /// generic address space that can be used access multiple segments of memory
448 /// with different address spaces. Access of a memory location through a
449 /// pointer with this address space is expected to be legal but slower
450 /// compared to the same memory location accessed through a pointer with a
451 /// different address space.
452 //
453 /// This is for targets with different pointer representations which can
454 /// be converted with the addrspacecast instruction. If a pointer is converted
455 /// to this address space, optimizations should attempt to replace the access
456 /// with the source address space.
457 ///
458 /// \returns ~0u if the target does not have such a flat address space to
459 /// optimize away.
460 unsigned getFlatAddressSpace() const;
461
462 /// Return any intrinsic address operand indexes which may be rewritten if
463 /// they use a flat address space pointer.
464 ///
465 /// \returns true if the intrinsic was handled.
467 Intrinsic::ID IID) const;
468
469 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
470
471 /// Return true if globals in this address space can have initializers other
472 /// than `undef`.
474
475 unsigned getAssumedAddrSpace(const Value *V) const;
476
477 bool isSingleThreaded() const;
478
479 std::pair<const Value *, unsigned>
480 getPredicatedAddrSpace(const Value *V) const;
481
482 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
483 /// NewV, which has a different address space. This should happen for every
484 /// operand index that collectFlatAddressOperands returned for the intrinsic.
485 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
486 /// new value (which may be the original \p II with modified operands).
488 Value *NewV) const;
489
490 /// Test whether calls to a function lower to actual program function
491 /// calls.
492 ///
493 /// The idea is to test whether the program is likely to require a 'call'
494 /// instruction or equivalent in order to call the given function.
495 ///
496 /// FIXME: It's not clear that this is a good or useful query API. Client's
497 /// should probably move to simpler cost metrics using the above.
498 /// Alternatively, we could split the cost interface into distinct code-size
499 /// and execution-speed costs. This would allow modelling the core of this
500 /// query more accurately as a call is a single small instruction, but
501 /// incurs significant execution cost.
502 bool isLoweredToCall(const Function *F) const;
503
504 struct LSRCost {
505 /// TODO: Some of these could be merged. Also, a lexical ordering
506 /// isn't always optimal.
507 unsigned Insns;
508 unsigned NumRegs;
509 unsigned AddRecCost;
510 unsigned NumIVMuls;
511 unsigned NumBaseAdds;
512 unsigned ImmCost;
513 unsigned SetupCost;
514 unsigned ScaleCost;
515 };
516
517 /// Parameters that control the generic loop unrolling transformation.
519 /// The cost threshold for the unrolled loop. Should be relative to the
520 /// getInstructionCost values returned by this API, and the expectation is
521 /// that the unrolled loop's instructions when run through that interface
522 /// should not exceed this cost. However, this is only an estimate. Also,
523 /// specific loops may be unrolled even with a cost above this threshold if
524 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
525 /// restriction.
526 unsigned Threshold;
527 /// If complete unrolling will reduce the cost of the loop, we will boost
528 /// the Threshold by a certain percent to allow more aggressive complete
529 /// unrolling. This value provides the maximum boost percentage that we
530 /// can apply to Threshold (The value should be no less than 100).
531 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
532 /// MaxPercentThresholdBoost / 100)
533 /// E.g. if complete unrolling reduces the loop execution time by 50%
534 /// then we boost the threshold by the factor of 2x. If unrolling is not
535 /// expected to reduce the running time, then we do not increase the
536 /// threshold.
538 /// The cost threshold for the unrolled loop when optimizing for size (set
539 /// to UINT_MAX to disable).
541 /// The cost threshold for the unrolled loop, like Threshold, but used
542 /// for partial/runtime unrolling (set to UINT_MAX to disable).
544 /// The cost threshold for the unrolled loop when optimizing for size, like
545 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
546 /// UINT_MAX to disable).
548 /// A forced unrolling factor (the number of concatenated bodies of the
549 /// original loop in the unrolled loop body). When set to 0, the unrolling
550 /// transformation will select an unrolling factor based on the current cost
551 /// threshold and other factors.
552 unsigned Count;
553 /// Default unroll count for loops with run-time trip count.
555 // Set the maximum unrolling factor. The unrolling factor may be selected
556 // using the appropriate cost threshold, but may not exceed this number
557 // (set to UINT_MAX to disable). This does not apply in cases where the
558 // loop is being fully unrolled.
559 unsigned MaxCount;
560 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
561 /// applies even if full unrolling is selected. This allows a target to fall
562 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
564 // Represents number of instructions optimized when "back edge"
565 // becomes "fall through" in unrolled loop.
566 // For now we count a conditional branch on a backedge and a comparison
567 // feeding it.
568 unsigned BEInsns;
569 /// Allow partial unrolling (unrolling of loops to expand the size of the
570 /// loop body, not only to eliminate small constant-trip-count loops).
572 /// Allow runtime unrolling (unrolling of loops to expand the size of the
573 /// loop body even when the number of loop iterations is not known at
574 /// compile time).
576 /// Allow generation of a loop remainder (extra iterations after unroll).
578 /// Allow emitting expensive instructions (such as divisions) when computing
579 /// the trip count of a loop for runtime unrolling.
581 /// Apply loop unroll on any kind of loop
582 /// (mainly to loops that fail runtime unrolling).
583 bool Force;
584 /// Allow using trip count upper bound to unroll loops.
586 /// Allow unrolling of all the iterations of the runtime loop remainder.
588 /// Allow unroll and jam. Used to enable unroll and jam for the target.
590 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
591 /// value above is used during unroll and jam for the outer loop size.
592 /// This value is used in the same manner to limit the size of the inner
593 /// loop.
595 /// Don't allow loop unrolling to simulate more than this number of
596 /// iterations when checking full unroll profitability
598 /// Don't disable runtime unroll for the loops which were vectorized.
600 };
601
602 /// Get target-customized preferences for the generic loop unrolling
603 /// transformation. The caller will initialize UP with the current
604 /// target-independent defaults.
607 OptimizationRemarkEmitter *ORE) const;
608
609 /// Query the target whether it would be profitable to convert the given loop
610 /// into a hardware loop.
613 HardwareLoopInfo &HWLoopInfo) const;
614
615 /// Query the target whether it would be prefered to create a predicated
616 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
618
619 /// Query the target what the preferred style of tail folding is.
620 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
621 /// may (or will never) overflow for the suggested VF/UF in the given loop.
622 /// Targets can use this information to select a more optimal tail folding
623 /// style. The value conservatively defaults to true, such that no assumptions
624 /// are made on overflow.
626 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
627
628 // Parameters that control the loop peeling transformation
630 /// A forced peeling factor (the number of bodied of the original loop
631 /// that should be peeled off before the loop body). When set to 0, the
632 /// a peeling factor based on profile information and other factors.
633 unsigned PeelCount;
634 /// Allow peeling off loop iterations.
636 /// Allow peeling off loop iterations for loop nests.
638 /// Allow peeling basing on profile. Uses to enable peeling off all
639 /// iterations basing on provided profile.
640 /// If the value is true the peeling cost model can decide to peel only
641 /// some iterations and in this case it will set this to false.
643 };
644
645 /// Get target-customized preferences for the generic loop peeling
646 /// transformation. The caller will initialize \p PP with the current
647 /// target-independent defaults with information from \p L and \p SE.
649 PeelingPreferences &PP) const;
650
651 /// Targets can implement their own combinations for target-specific
652 /// intrinsics. This function will be called from the InstCombine pass every
653 /// time a target-specific intrinsic is encountered.
654 ///
655 /// \returns std::nullopt to not do anything target specific or a value that
656 /// will be returned from the InstCombiner. It is possible to return null and
657 /// stop further processing of the intrinsic by returning nullptr.
658 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
659 IntrinsicInst & II) const;
660 /// Can be used to implement target-specific instruction combining.
661 /// \see instCombineIntrinsic
662 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
663 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
664 KnownBits & Known, bool &KnownBitsComputed) const;
665 /// Can be used to implement target-specific instruction combining.
666 /// \see instCombineIntrinsic
667 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
668 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
669 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
670 std::function<void(Instruction *, unsigned, APInt, APInt &)>
671 SimplifyAndSetOp) const;
672 /// @}
673
674 /// \name Scalar Target Information
675 /// @{
676
677 /// Flags indicating the kind of support for population count.
678 ///
679 /// Compared to the SW implementation, HW support is supposed to
680 /// significantly boost the performance when the population is dense, and it
681 /// may or may not degrade performance if the population is sparse. A HW
682 /// support is considered as "Fast" if it can outperform, or is on a par
683 /// with, SW implementation when the population is sparse; otherwise, it is
684 /// considered as "Slow".
686
687 /// Return true if the specified immediate is legal add immediate, that
688 /// is the target has add instructions which can add a register with the
689 /// immediate without having to materialize the immediate into a register.
690 bool isLegalAddImmediate(int64_t Imm) const;
691
692 /// Return true if the specified immediate is legal icmp immediate,
693 /// that is the target has icmp instructions which can compare a register
694 /// against the immediate without having to materialize the immediate into a
695 /// register.
696 bool isLegalICmpImmediate(int64_t Imm) const;
697
698 /// Return true if the addressing mode represented by AM is legal for
699 /// this target, for a load/store of the specified type.
700 /// The type may be VoidTy, in which case only return true if the addressing
701 /// mode is legal for a load/store of any legal type.
702 /// If target returns true in LSRWithInstrQueries(), I may be valid.
703 /// TODO: Handle pre/postinc as well.
704 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
705 bool HasBaseReg, int64_t Scale,
706 unsigned AddrSpace = 0,
707 Instruction *I = nullptr) const;
708
709 /// Return true if LSR cost of C1 is lower than C2.
711 const TargetTransformInfo::LSRCost &C2) const;
712
713 /// Return true if LSR major cost is number of registers. Targets which
714 /// implement their own isLSRCostLess and unset number of registers as major
715 /// cost should return false, otherwise return true.
716 bool isNumRegsMajorCostOfLSR() const;
717
718 /// \returns true if LSR should not optimize a chain that includes \p I.
720
721 /// Return true if the target can fuse a compare and branch.
722 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
723 /// calculation for the instructions in a loop.
724 bool canMacroFuseCmp() const;
725
726 /// Return true if the target can save a compare for loop count, for example
727 /// hardware loop saves a compare.
728 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
730 TargetLibraryInfo *LibInfo) const;
731
736 };
737
738 /// Return the preferred addressing mode LSR should make efforts to generate.
740 ScalarEvolution *SE) const;
741
742 /// Return true if the target supports masked store.
743 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
744 /// Return true if the target supports masked load.
745 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
746
747 /// Return true if the target supports nontemporal store.
748 bool isLegalNTStore(Type *DataType, Align Alignment) const;
749 /// Return true if the target supports nontemporal load.
750 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
751
752 /// \Returns true if the target supports broadcasting a load to a vector of
753 /// type <NumElements x ElementTy>.
754 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
755
756 /// Return true if the target supports masked scatter.
757 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
758 /// Return true if the target supports masked gather.
759 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
760 /// Return true if the target forces scalarizing of llvm.masked.gather
761 /// intrinsics.
762 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
763 /// Return true if the target forces scalarizing of llvm.masked.scatter
764 /// intrinsics.
765 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
766
767 /// Return true if the target supports masked compress store.
768 bool isLegalMaskedCompressStore(Type *DataType) const;
769 /// Return true if the target supports masked expand load.
770 bool isLegalMaskedExpandLoad(Type *DataType) const;
771
772 /// Return true if this is an alternating opcode pattern that can be lowered
773 /// to a single instruction on the target. In X86 this is for the addsub
774 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
775 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
776 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
777 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
778 /// \p VecTy is the vector type of the instruction to be generated.
779 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
780 const SmallBitVector &OpcodeMask) const;
781
782 /// Return true if we should be enabling ordered reductions for the target.
783 bool enableOrderedReductions() const;
784
785 /// Return true if the target has a unified operation to calculate division
786 /// and remainder. If so, the additional implicit multiplication and
787 /// subtraction required to calculate a remainder from division are free. This
788 /// can enable more aggressive transformations for division and remainder than
789 /// would typically be allowed using throughput or size cost models.
790 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
791
792 /// Return true if the given instruction (assumed to be a memory access
793 /// instruction) has a volatile variant. If that's the case then we can avoid
794 /// addrspacecast to generic AS for volatile loads/stores. Default
795 /// implementation returns false, which prevents address space inference for
796 /// volatile loads/stores.
797 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
798
799 /// Return true if target doesn't mind addresses in vectors.
800 bool prefersVectorizedAddressing() const;
801
802 /// Return the cost of the scaling factor used in the addressing
803 /// mode represented by AM for this target, for a load/store
804 /// of the specified type.
805 /// If the AM is supported, the return value must be >= 0.
806 /// If the AM is not supported, it returns a negative value.
807 /// TODO: Handle pre/postinc as well.
809 int64_t BaseOffset, bool HasBaseReg,
810 int64_t Scale,
811 unsigned AddrSpace = 0) const;
812
813 /// Return true if the loop strength reduce pass should make
814 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
815 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
816 /// immediate offset and no index register.
817 bool LSRWithInstrQueries() const;
818
819 /// Return true if it's free to truncate a value of type Ty1 to type
820 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
821 /// by referencing its sub-register AX.
822 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
823
824 /// Return true if it is profitable to hoist instruction in the
825 /// then/else to before if.
826 bool isProfitableToHoist(Instruction *I) const;
827
828 bool useAA() const;
829
830 /// Return true if this type is legal.
831 bool isTypeLegal(Type *Ty) const;
832
833 /// Returns the estimated number of registers required to represent \p Ty.
834 unsigned getRegUsageForType(Type *Ty) const;
835
836 /// Return true if switches should be turned into lookup tables for the
837 /// target.
838 bool shouldBuildLookupTables() const;
839
840 /// Return true if switches should be turned into lookup tables
841 /// containing this constant value for the target.
843
844 /// Return true if lookup tables should be turned into relative lookup tables.
845 bool shouldBuildRelLookupTables() const;
846
847 /// Return true if the input function which is cold at all call sites,
848 /// should use coldcc calling convention.
849 bool useColdCCForColdCall(Function &F) const;
850
851 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
852 /// are set if the demanded result elements need to be inserted and/or
853 /// extracted from vectors.
855 const APInt &DemandedElts,
856 bool Insert, bool Extract,
858
859 /// Estimate the overhead of scalarizing an instructions unique
860 /// non-constant operands. The (potentially vector) types to use for each of
861 /// argument are passes via Tys.
866
867 /// If target has efficient vector element load/store instructions, it can
868 /// return true here so that insertion/extraction costs are not added to
869 /// the scalarization cost of a load/store.
871
872 /// If the target supports tail calls.
873 bool supportsTailCalls() const;
874
875 /// If target supports tail call on \p CB
876 bool supportsTailCallFor(const CallBase *CB) const;
877
878 /// Don't restrict interleaved unrolling to small loops.
879 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
880
881 /// Returns options for expansion of memcmp. IsZeroCmp is
882 // true if this is the expansion of memcmp(p1, p2, s) == 0.
884 // Return true if memcmp expansion is enabled.
885 operator bool() const { return MaxNumLoads > 0; }
886
887 // Maximum number of load operations.
888 unsigned MaxNumLoads = 0;
889
890 // The list of available load sizes (in bytes), sorted in decreasing order.
892
893 // For memcmp expansion when the memcmp result is only compared equal or
894 // not-equal to 0, allow up to this number of load pairs per block. As an
895 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
896 // a0 = load2bytes &a[0]
897 // b0 = load2bytes &b[0]
898 // a2 = load1byte &a[2]
899 // b2 = load1byte &b[2]
900 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
901 unsigned NumLoadsPerBlock = 1;
902
903 // Set to true to allow overlapping loads. For example, 7-byte compares can
904 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
905 // requires all loads in LoadSizes to be doable in an unaligned way.
907 };
909 bool IsZeroCmp) const;
910
911 /// Should the Select Optimization pass be enabled and ran.
912 bool enableSelectOptimize() const;
913
914 /// Enable matching of interleaved access groups.
916
917 /// Enable matching of interleaved access groups that contain predicated
918 /// accesses or gaps and therefore vectorized using masked
919 /// vector loads/stores.
921
922 /// Indicate that it is potentially unsafe to automatically vectorize
923 /// floating-point operations because the semantics of vector and scalar
924 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
925 /// does not support IEEE-754 denormal numbers, while depending on the
926 /// platform, scalar floating-point math does.
927 /// This applies to floating-point math operations and calls, not memory
928 /// operations, shuffles, or casts.
930
931 /// Determine if the target supports unaligned memory accesses.
933 unsigned AddressSpace = 0,
934 Align Alignment = Align(1),
935 unsigned *Fast = nullptr) const;
936
937 /// Return hardware support for population count.
938 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
939
940 /// Return true if the hardware has a fast square-root instruction.
941 bool haveFastSqrt(Type *Ty) const;
942
943 /// Return true if the cost of the instruction is too high to speculatively
944 /// execute and should be kept behind a branch.
945 /// This normally just wraps around a getInstructionCost() call, but some
946 /// targets might report a low TCK_SizeAndLatency value that is incompatible
947 /// with the fixed TCC_Expensive value.
948 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
950
951 /// Return true if it is faster to check if a floating-point value is NaN
952 /// (or not-NaN) versus a comparison against a constant FP zero value.
953 /// Targets should override this if materializing a 0.0 for comparison is
954 /// generally as cheap as checking for ordered/unordered.
955 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
956
957 /// Return the expected cost of supporting the floating point operation
958 /// of the specified type.
960
961 /// Return the expected cost of materializing for the given integer
962 /// immediate of the specified type.
963 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
965
966 /// Return the expected cost of materialization for the given integer
967 /// immediate of the specified type for a given instruction. The cost can be
968 /// zero if the immediate can be folded into the specified instruction.
969 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
970 const APInt &Imm, Type *Ty,
972 Instruction *Inst = nullptr) const;
974 const APInt &Imm, Type *Ty,
976
977 /// Return the expected cost for the given integer when optimising
978 /// for size. This is different than the other integer immediate cost
979 /// functions in that it is subtarget agnostic. This is useful when you e.g.
980 /// target one ISA such as Aarch32 but smaller encodings could be possible
981 /// with another such as Thumb. This return value is used as a penalty when
982 /// the total costs for a constant is calculated (the bigger the cost, the
983 /// more beneficial constant hoisting is).
984 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
985 const APInt &Imm, Type *Ty) const;
986 /// @}
987
988 /// \name Vector Target Information
989 /// @{
990
991 /// The various kinds of shuffle patterns for vector queries.
993 SK_Broadcast, ///< Broadcast element 0 to all other elements.
994 SK_Reverse, ///< Reverse the order of the vector.
995 SK_Select, ///< Selects elements from the corresponding lane of
996 ///< either source operand. This is equivalent to a
997 ///< vector select with a constant condition operand.
998 SK_Transpose, ///< Transpose two vectors.
999 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1000 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1001 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1002 ///< with any shuffle mask.
1003 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1004 ///< shuffle mask.
1005 SK_Splice ///< Concatenates elements from the first input vector
1006 ///< with elements of the second input vector. Returning
1007 ///< a vector of the same type as the input vectors.
1008 ///< Index indicates start offset in first input vector.
1010
1011 /// Additional information about an operand's possible values.
1013 OK_AnyValue, // Operand can have any value.
1014 OK_UniformValue, // Operand is uniform (splat of a value).
1015 OK_UniformConstantValue, // Operand is uniform constant.
1016 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1018
1019 /// Additional properties of an operand's values.
1024 };
1025
1026 // Describe the values an operand can take. We're in the process
1027 // of migrating uses of OperandValueKind and OperandValueProperties
1028 // to use this class, and then will change the internal representation.
1032
1033 bool isConstant() const {
1035 }
1036 bool isUniform() const {
1038 }
1039 bool isPowerOf2() const {
1040 return Properties == OP_PowerOf2;
1041 }
1042 bool isNegatedPowerOf2() const {
1044 }
1045
1047 return {Kind, OP_None};
1048 }
1049 };
1050
1051 /// \return the number of registers in the target-provided register class.
1052 unsigned getNumberOfRegisters(unsigned ClassID) const;
1053
1054 /// \return the target-provided register class ID for the provided type,
1055 /// accounting for type promotion and other type-legalization techniques that
1056 /// the target might apply. However, it specifically does not account for the
1057 /// scalarization or splitting of vector types. Should a vector type require
1058 /// scalarization or splitting into multiple underlying vector registers, that
1059 /// type should be mapped to a register class containing no registers.
1060 /// Specifically, this is designed to provide a simple, high-level view of the
1061 /// register allocation later performed by the backend. These register classes
1062 /// don't necessarily map onto the register classes used by the backend.
1063 /// FIXME: It's not currently possible to determine how many registers
1064 /// are used by the provided type.
1065 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1066
1067 /// \return the target-provided register class name
1068 const char *getRegisterClassName(unsigned ClassID) const;
1069
1071
1072 /// \return The width of the largest scalar or vector register type.
1074
1075 /// \return The width of the smallest vector register type.
1076 unsigned getMinVectorRegisterBitWidth() const;
1077
1078 /// \return The maximum value of vscale if the target specifies an
1079 /// architectural maximum vector length, and std::nullopt otherwise.
1080 std::optional<unsigned> getMaxVScale() const;
1081
1082 /// \return the value of vscale to tune the cost model for.
1083 std::optional<unsigned> getVScaleForTuning() const;
1084
1085 /// \return true if vscale is known to be a power of 2
1086 bool isVScaleKnownToBeAPowerOfTwo() const;
1087
1088 /// \return True if the vectorization factor should be chosen to
1089 /// make the vector of the smallest element type match the size of a
1090 /// vector register. For wider element types, this could result in
1091 /// creating vectors that span multiple vector registers.
1092 /// If false, the vectorization factor will be chosen based on the
1093 /// size of the widest element type.
1094 /// \p K Register Kind for vectorization.
1096
1097 /// \return The minimum vectorization factor for types of given element
1098 /// bit width, or 0 if there is no minimum VF. The returned value only
1099 /// applies when shouldMaximizeVectorBandwidth returns true.
1100 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1101 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1102
1103 /// \return The maximum vectorization factor for types of given element
1104 /// bit width and opcode, or 0 if there is no maximum VF.
1105 /// Currently only used by the SLP vectorizer.
1106 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1107
1108 /// \return The minimum vectorization factor for the store instruction. Given
1109 /// the initial estimation of the minimum vector factor and store value type,
1110 /// it tries to find possible lowest VF, which still might be profitable for
1111 /// the vectorization.
1112 /// \param VF Initial estimation of the minimum vector factor.
1113 /// \param ScalarMemTy Scalar memory type of the store operation.
1114 /// \param ScalarValTy Scalar type of the stored value.
1115 /// Currently only used by the SLP vectorizer.
1116 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1117 Type *ScalarValTy) const;
1118
1119 /// \return True if it should be considered for address type promotion.
1120 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1121 /// profitable without finding other extensions fed by the same input.
1123 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1124
1125 /// \return The size of a cache line in bytes.
1126 unsigned getCacheLineSize() const;
1127
1128 /// The possible cache levels
1129 enum class CacheLevel {
1130 L1D, // The L1 data cache
1131 L2D, // The L2 data cache
1132
1133 // We currently do not model L3 caches, as their sizes differ widely between
1134 // microarchitectures. Also, we currently do not have a use for L3 cache
1135 // size modeling yet.
1136 };
1137
1138 /// \return The size of the cache level in bytes, if available.
1139 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1140
1141 /// \return The associativity of the cache level, if available.
1142 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1143
1144 /// \return How much before a load we should place the prefetch
1145 /// instruction. This is currently measured in number of
1146 /// instructions.
1147 unsigned getPrefetchDistance() const;
1148
1149 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1150 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1151 /// and the arguments provided are meant to serve as a basis for deciding this
1152 /// for a particular loop.
1153 ///
1154 /// \param NumMemAccesses Number of memory accesses in the loop.
1155 /// \param NumStridedMemAccesses Number of the memory accesses that
1156 /// ScalarEvolution could find a known stride
1157 /// for.
1158 /// \param NumPrefetches Number of software prefetches that will be
1159 /// emitted as determined by the addresses
1160 /// involved and the cache line size.
1161 /// \param HasCall True if the loop contains a call.
1162 ///
1163 /// \return This is the minimum stride in bytes where it makes sense to start
1164 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1165 /// stride.
1166 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1167 unsigned NumStridedMemAccesses,
1168 unsigned NumPrefetches, bool HasCall) const;
1169
1170 /// \return The maximum number of iterations to prefetch ahead. If
1171 /// the required number of iterations is more than this number, no
1172 /// prefetching is performed.
1173 unsigned getMaxPrefetchIterationsAhead() const;
1174
1175 /// \return True if prefetching should also be done for writes.
1176 bool enableWritePrefetching() const;
1177
1178 /// \return if target want to issue a prefetch in address space \p AS.
1179 bool shouldPrefetchAddressSpace(unsigned AS) const;
1180
1181 /// \return The maximum interleave factor that any transform should try to
1182 /// perform for this target. This number depends on the level of parallelism
1183 /// and the number of execution units in the CPU.
1184 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1185
1186 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1187 static OperandValueInfo getOperandInfo(const Value *V);
1188
1189 /// This is an approximation of reciprocal throughput of a math/logic op.
1190 /// A higher cost indicates less expected throughput.
1191 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1192 /// clock cycles per instruction when the instructions are not part of a
1193 /// limiting dependency chain."
1194 /// Therefore, costs should be scaled to account for multiple execution units
1195 /// on the target that can process this type of instruction. For example, if
1196 /// there are 5 scalar integer units and 2 vector integer units that can
1197 /// calculate an 'add' in a single cycle, this model should indicate that the
1198 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1199 /// add instruction.
1200 /// \p Args is an optional argument which holds the instruction operands
1201 /// values so the TTI can analyze those values searching for special
1202 /// cases or optimizations based on those values.
1203 /// \p CxtI is the optional original context instruction, if one exists, to
1204 /// provide even more information.
1206 unsigned Opcode, Type *Ty,
1209 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1210 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1211 const Instruction *CxtI = nullptr) const;
1212
1213 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1214 /// The exact mask may be passed as Mask, or else the array will be empty.
1215 /// The index and subtype parameters are used by the subvector insertion and
1216 /// extraction shuffle kinds to show the insert/extract point and the type of
1217 /// the subvector being inserted/extracted. The operands of the shuffle can be
1218 /// passed through \p Args, which helps improve the cost estimation in some
1219 /// cases, like in broadcast loads.
1220 /// NOTE: For subvector extractions Tp represents the source type.
1221 InstructionCost
1223 ArrayRef<int> Mask = std::nullopt,
1225 int Index = 0, VectorType *SubTp = nullptr,
1226 ArrayRef<const Value *> Args = std::nullopt) const;
1227
1228 /// Represents a hint about the context in which a cast is used.
1229 ///
1230 /// For zext/sext, the context of the cast is the operand, which must be a
1231 /// load of some kind. For trunc, the context is of the cast is the single
1232 /// user of the instruction, which must be a store of some kind.
1233 ///
1234 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1235 /// type of cast it's dealing with, as not every cast is equal. For instance,
1236 /// the zext of a load may be free, but the zext of an interleaving load can
1237 //// be (very) expensive!
1238 ///
1239 /// See \c getCastContextHint to compute a CastContextHint from a cast
1240 /// Instruction*. Callers can use it if they don't need to override the
1241 /// context and just want it to be calculated from the instruction.
1242 ///
1243 /// FIXME: This handles the types of load/store that the vectorizer can
1244 /// produce, which are the cases where the context instruction is most
1245 /// likely to be incorrect. There are other situations where that can happen
1246 /// too, which might be handled here but in the long run a more general
1247 /// solution of costing multiple instructions at the same times may be better.
1248 enum class CastContextHint : uint8_t {
1249 None, ///< The cast is not used with a load/store of any kind.
1250 Normal, ///< The cast is used with a normal load/store.
1251 Masked, ///< The cast is used with a masked load/store.
1252 GatherScatter, ///< The cast is used with a gather/scatter.
1253 Interleave, ///< The cast is used with an interleaved load/store.
1254 Reversed, ///< The cast is used with a reversed load/store.
1255 };
1256
1257 /// Calculates a CastContextHint from \p I.
1258 /// This should be used by callers of getCastInstrCost if they wish to
1259 /// determine the context from some instruction.
1260 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1261 /// or if it's another type of cast.
1263
1264 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1265 /// zext, etc. If there is an existing instruction that holds Opcode, it
1266 /// may be passed in the 'I' parameter.
1268 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1271 const Instruction *I = nullptr) const;
1272
1273 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1274 /// Index = -1 to indicate that there is no information about the index value.
1275 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1276 VectorType *VecTy,
1277 unsigned Index) const;
1278
1279 /// \return The expected cost of control-flow related instructions such as
1280 /// Phi, Ret, Br, Switch.
1282 getCFInstrCost(unsigned Opcode,
1284 const Instruction *I = nullptr) const;
1285
1286 /// \returns The expected cost of compare and select instructions. If there
1287 /// is an existing instruction that holds Opcode, it may be passed in the
1288 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1289 /// is using a compare with the specified predicate as condition. When vector
1290 /// types are passed, \p VecPred must be used for all lanes.
1292 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1293 CmpInst::Predicate VecPred,
1295 const Instruction *I = nullptr) const;
1296
1297 /// \return The expected cost of vector Insert and Extract.
1298 /// Use -1 to indicate that there is no information on the index value.
1299 /// This is used when the instruction is not available; a typical use
1300 /// case is to provision the cost of vectorization/scalarization in
1301 /// vectorizer passes.
1302 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1304 unsigned Index = -1, Value *Op0 = nullptr,
1305 Value *Op1 = nullptr) const;
1306
1307 /// \return The expected cost of vector Insert and Extract.
1308 /// This is used when instruction is available, and implementation
1309 /// asserts 'I' is not nullptr.
1310 ///
1311 /// A typical suitable use case is cost estimation when vector instruction
1312 /// exists (e.g., from basic blocks during transformation).
1315 unsigned Index = -1) const;
1316
1317 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1318 /// \p ReplicationFactor times.
1319 ///
1320 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1321 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1322 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1323 int VF,
1324 const APInt &DemandedDstElts,
1326
1327 /// \return The cost of Load and Store instructions.
1329 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1330 unsigned AddressSpace,
1332 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1333 const Instruction *I = nullptr) const;
1334
1335 /// \return The cost of VP Load and Store instructions.
1336 InstructionCost
1337 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1338 unsigned AddressSpace,
1340 const Instruction *I = nullptr) const;
1341
1342 /// \return The cost of masked Load and Store instructions.
1344 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1346
1347 /// \return The cost of Gather or Scatter operation
1348 /// \p Opcode - is a type of memory access Load or Store
1349 /// \p DataTy - a vector type of the data to be loaded or stored
1350 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1351 /// \p VariableMask - true when the memory access is predicated with a mask
1352 /// that is not a compile-time constant
1353 /// \p Alignment - alignment of single element
1354 /// \p I - the optional original context instruction, if one exists, e.g. the
1355 /// load/store to transform or the call to the gather/scatter intrinsic
1357 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1359 const Instruction *I = nullptr) const;
1360
1361 /// \return The cost of the interleaved memory operation.
1362 /// \p Opcode is the memory operation code
1363 /// \p VecTy is the vector type of the interleaved access.
1364 /// \p Factor is the interleave factor
1365 /// \p Indices is the indices for interleaved load members (as interleaved
1366 /// load allows gaps)
1367 /// \p Alignment is the alignment of the memory operation
1368 /// \p AddressSpace is address space of the pointer.
1369 /// \p UseMaskForCond indicates if the memory access is predicated.
1370 /// \p UseMaskForGaps indicates if gaps should be masked.
1372 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1373 Align Alignment, unsigned AddressSpace,
1375 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1376
1377 /// A helper function to determine the type of reduction algorithm used
1378 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1379 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1380 return FMF && !(*FMF).allowReassoc();
1381 }
1382
1383 /// Calculate the cost of vector reduction intrinsics.
1384 ///
1385 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1386 /// value using the operation denoted by \p Opcode. The FastMathFlags
1387 /// parameter \p FMF indicates what type of reduction we are performing:
1388 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1389 /// involves successively splitting a vector into half and doing the
1390 /// operation on the pair of halves until you have a scalar value. For
1391 /// example:
1392 /// (v0, v1, v2, v3)
1393 /// ((v0+v2), (v1+v3), undef, undef)
1394 /// ((v0+v2+v1+v3), undef, undef, undef)
1395 /// This is the default behaviour for integer operations, whereas for
1396 /// floating point we only do this if \p FMF indicates that
1397 /// reassociation is allowed.
1398 /// 2. Ordered. For a vector with N elements this involves performing N
1399 /// operations in lane order, starting with an initial scalar value, i.e.
1400 /// result = InitVal + v0
1401 /// result = result + v1
1402 /// result = result + v2
1403 /// result = result + v3
1404 /// This is only the case for FP operations and when reassociation is not
1405 /// allowed.
1406 ///
1408 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1410
1414
1415 /// Calculate the cost of an extended reduction pattern, similar to
1416 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1417 /// extensions. This is the cost of as:
1418 /// ResTy vecreduce.add(mul (A, B)).
1419 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1421 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1423
1424 /// Calculate the cost of an extended reduction pattern, similar to
1425 /// getArithmeticReductionCost of a reduction with an extension.
1426 /// This is the cost of as:
1427 /// ResTy vecreduce.opcode(ext(Ty A)).
1429 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1430 FastMathFlags FMF,
1432
1433 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1434 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1435 /// 3. scalar instruction which is to be vectorized.
1438
1439 /// \returns The cost of Call instructions.
1443
1444 /// \returns The number of pieces into which the provided type must be
1445 /// split during legalization. Zero is returned when the answer is unknown.
1446 unsigned getNumberOfParts(Type *Tp) const;
1447
1448 /// \returns The cost of the address computation. For most targets this can be
1449 /// merged into the instruction indexing mode. Some targets might want to
1450 /// distinguish between address computation for memory operations on vector
1451 /// types and scalar types. Such targets should override this function.
1452 /// The 'SE' parameter holds pointer for the scalar evolution object which
1453 /// is used in order to get the Ptr step value in case of constant stride.
1454 /// The 'Ptr' parameter holds SCEV of the access pointer.
1456 ScalarEvolution *SE = nullptr,
1457 const SCEV *Ptr = nullptr) const;
1458
1459 /// \returns The cost, if any, of keeping values of the given types alive
1460 /// over a callsite.
1461 ///
1462 /// Some types may require the use of register classes that do not have
1463 /// any callee-saved registers, so would require a spill and fill.
1465
1466 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1467 /// will contain additional information - whether the intrinsic may write
1468 /// or read to memory, volatility and the pointer. Info is undefined
1469 /// if false is returned.
1471
1472 /// \returns The maximum element size, in bytes, for an element
1473 /// unordered-atomic memory intrinsic.
1474 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1475
1476 /// \returns A value which is the result of the given memory intrinsic. New
1477 /// instructions may be created to extract the result from the given intrinsic
1478 /// memory operation. Returns nullptr if the target cannot create a result
1479 /// from the given intrinsic.
1481 Type *ExpectedType) const;
1482
1483 /// \returns The type to use in a loop expansion of a memcpy call.
1485 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1486 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1487 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1488
1489 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1490 /// \param RemainingBytes The number of bytes to copy.
1491 ///
1492 /// Calculates the operand types to use when copying \p RemainingBytes of
1493 /// memory, where source and destination alignments are \p SrcAlign and
1494 /// \p DestAlign respectively.
1496 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1497 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1498 unsigned SrcAlign, unsigned DestAlign,
1499 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1500
1501 /// \returns True if the two functions have compatible attributes for inlining
1502 /// purposes.
1503 bool areInlineCompatible(const Function *Caller,
1504 const Function *Callee) const;
1505
1506 /// \returns True if the caller and callee agree on how \p Types will be
1507 /// passed to or returned from the callee.
1508 /// to the callee.
1509 /// \param Types List of types to check.
1510 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1511 const ArrayRef<Type *> &Types) const;
1512
1513 /// The type of load/store indexing.
1515 MIM_Unindexed, ///< No indexing.
1516 MIM_PreInc, ///< Pre-incrementing.
1517 MIM_PreDec, ///< Pre-decrementing.
1518 MIM_PostInc, ///< Post-incrementing.
1519 MIM_PostDec ///< Post-decrementing.
1521
1522 /// \returns True if the specified indexed load for the given type is legal.
1523 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1524
1525 /// \returns True if the specified indexed store for the given type is legal.
1526 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1527
1528 /// \returns The bitwidth of the largest vector type that should be used to
1529 /// load/store in the given address space.
1530 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1531
1532 /// \returns True if the load instruction is legal to vectorize.
1533 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1534
1535 /// \returns True if the store instruction is legal to vectorize.
1536 bool isLegalToVectorizeStore(StoreInst *SI) const;
1537
1538 /// \returns True if it is legal to vectorize the given load chain.
1539 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1540 unsigned AddrSpace) const;
1541
1542 /// \returns True if it is legal to vectorize the given store chain.
1543 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1544 unsigned AddrSpace) const;
1545
1546 /// \returns True if it is legal to vectorize the given reduction kind.
1548 ElementCount VF) const;
1549
1550 /// \returns True if the given type is supported for scalable vectors
1552
1553 /// \returns The new vector factor value if the target doesn't support \p
1554 /// SizeInBytes loads or has a better vector factor.
1555 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1556 unsigned ChainSizeInBytes,
1557 VectorType *VecTy) const;
1558
1559 /// \returns The new vector factor value if the target doesn't support \p
1560 /// SizeInBytes stores or has a better vector factor.
1561 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1562 unsigned ChainSizeInBytes,
1563 VectorType *VecTy) const;
1564
1565 /// Flags describing the kind of vector reduction.
1567 ReductionFlags() = default;
1568 bool IsMaxOp =
1569 false; ///< If the op a min/max kind, true if it's a max operation.
1570 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1571 bool NoNaN =
1572 false; ///< If op is an fp min/max, whether NaNs may be present.
1573 };
1574
1575 /// \returns True if the target prefers reductions in loop.
1576 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1577 ReductionFlags Flags) const;
1578
1579 /// \returns True if the target prefers reductions select kept in the loop
1580 /// when tail folding. i.e.
1581 /// loop:
1582 /// p = phi (0, s)
1583 /// a = add (p, x)
1584 /// s = select (mask, a, p)
1585 /// vecreduce.add(s)
1586 ///
1587 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1588 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1589 /// by the target, this can lead to cleaner code generation.
1590 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1591 ReductionFlags Flags) const;
1592
1593 /// Return true if the loop vectorizer should consider vectorizing an
1594 /// otherwise scalar epilogue loop.
1595 bool preferEpilogueVectorization() const;
1596
1597 /// \returns True if the target wants to expand the given reduction intrinsic
1598 /// into a shuffle sequence.
1599 bool shouldExpandReduction(const IntrinsicInst *II) const;
1600
1601 /// \returns the size cost of rematerializing a GlobalValue address relative
1602 /// to a stack reload.
1603 unsigned getGISelRematGlobalCost() const;
1604
1605 /// \returns the lower bound of a trip count to decide on vectorization
1606 /// while tail-folding.
1607 unsigned getMinTripCountTailFoldingThreshold() const;
1608
1609 /// \returns True if the target supports scalable vectors.
1610 bool supportsScalableVectors() const;
1611
1612 /// \return true when scalable vectorization is preferred.
1613 bool enableScalableVectorization() const;
1614
1615 /// \name Vector Predication Information
1616 /// @{
1617 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1618 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1619 /// Reference - "Vector Predication Intrinsics").
1620 /// Use of %evl is discouraged when that is not the case.
1621 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1622 Align Alignment) const;
1623
1626 // keep the predicating parameter
1628 // where legal, discard the predicate parameter
1630 // transform into something else that is also predicating
1631 Convert = 2
1633
1634 // How to transform the EVL parameter.
1635 // Legal: keep the EVL parameter as it is.
1636 // Discard: Ignore the EVL parameter where it is safe to do so.
1637 // Convert: Fold the EVL into the mask parameter.
1639
1640 // How to transform the operator.
1641 // Legal: The target supports this operator.
1642 // Convert: Convert this to a non-VP operation.
1643 // The 'Discard' strategy is invalid.
1645
1646 bool shouldDoNothing() const {
1647 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1648 }
1651 };
1652
1653 /// \returns How the target needs this vector-predicated operation to be
1654 /// transformed.
1656 /// @}
1657
1658 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1659 /// state.
1660 ///
1661 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1662 /// node containing a jump table in a format suitable for the target, so it
1663 /// needs to know what format of jump table it can legally use.
1664 ///
1665 /// For non-Arm targets, this function isn't used. It defaults to returning
1666 /// false, but it shouldn't matter what it returns anyway.
1667 bool hasArmWideBranch(bool Thumb) const;
1668
1669 /// \return The maximum number of function arguments the target supports.
1670 unsigned getMaxNumArgs() const;
1671
1672 /// @}
1673
1674private:
1675 /// The abstract base class used to type erase specific TTI
1676 /// implementations.
1677 class Concept;
1678
1679 /// The template model for the base class which wraps a concrete
1680 /// implementation in a type erased interface.
1681 template <typename T> class Model;
1682
1683 std::unique_ptr<Concept> TTIImpl;
1684};
1685
1687public:
1688 virtual ~Concept() = 0;
1689 virtual const DataLayout &getDataLayout() const = 0;
1690 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1692 Type *AccessType,
1694 virtual InstructionCost
1696 const TTI::PointersChainInfo &Info, Type *AccessTy,
1698 virtual unsigned getInliningThresholdMultiplier() const = 0;
1699 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1700 virtual int getInlinerVectorBonusPercent() const = 0;
1701 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1702 const AllocaInst *AI) const = 0;
1705 virtual unsigned
1707 ProfileSummaryInfo *PSI,
1708 BlockFrequencyInfo *BFI) = 0;
1713 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1714 virtual bool isSourceOfDivergence(const Value *V) = 0;
1715 virtual bool isAlwaysUniform(const Value *V) = 0;
1716 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1717 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1718 virtual unsigned getFlatAddressSpace() = 0;
1720 Intrinsic::ID IID) const = 0;
1721 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1722 virtual bool
1724 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1725 virtual bool isSingleThreaded() const = 0;
1726 virtual std::pair<const Value *, unsigned>
1727 getPredicatedAddrSpace(const Value *V) const = 0;
1729 Value *OldV,
1730 Value *NewV) const = 0;
1731 virtual bool isLoweredToCall(const Function *F) = 0;
1734 OptimizationRemarkEmitter *ORE) = 0;
1736 PeelingPreferences &PP) = 0;
1738 AssumptionCache &AC,
1739 TargetLibraryInfo *LibInfo,
1740 HardwareLoopInfo &HWLoopInfo) = 0;
1742 virtual TailFoldingStyle
1743 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1744 virtual std::optional<Instruction *> instCombineIntrinsic(
1745 InstCombiner &IC, IntrinsicInst &II) = 0;
1746 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1747 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1748 KnownBits & Known, bool &KnownBitsComputed) = 0;
1749 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1750 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1751 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1752 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1753 SimplifyAndSetOp) = 0;
1754 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1755 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1756 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1757 int64_t BaseOffset, bool HasBaseReg,
1758 int64_t Scale, unsigned AddrSpace,
1759 Instruction *I) = 0;
1761 const TargetTransformInfo::LSRCost &C2) = 0;
1762 virtual bool isNumRegsMajorCostOfLSR() = 0;
1764 virtual bool canMacroFuseCmp() = 0;
1765 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1767 TargetLibraryInfo *LibInfo) = 0;
1768 virtual AddressingModeKind
1770 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1771 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1772 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1773 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1774 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1775 ElementCount NumElements) const = 0;
1776 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1777 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1779 Align Alignment) = 0;
1781 Align Alignment) = 0;
1782 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1783 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1784 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1785 unsigned Opcode1,
1786 const SmallBitVector &OpcodeMask) const = 0;
1787 virtual bool enableOrderedReductions() = 0;
1788 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1789 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1792 int64_t BaseOffset,
1793 bool HasBaseReg, int64_t Scale,
1794 unsigned AddrSpace) = 0;
1795 virtual bool LSRWithInstrQueries() = 0;
1796 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1798 virtual bool useAA() = 0;
1799 virtual bool isTypeLegal(Type *Ty) = 0;
1800 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1801 virtual bool shouldBuildLookupTables() = 0;
1803 virtual bool shouldBuildRelLookupTables() = 0;
1804 virtual bool useColdCCForColdCall(Function &F) = 0;
1806 const APInt &DemandedElts,
1807 bool Insert, bool Extract,
1809 virtual InstructionCost
1811 ArrayRef<Type *> Tys,
1814 virtual bool supportsTailCalls() = 0;
1815 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1816 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1818 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1819 virtual bool enableSelectOptimize() = 0;
1824 unsigned BitWidth,
1825 unsigned AddressSpace,
1826 Align Alignment,
1827 unsigned *Fast) = 0;
1828 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1829 virtual bool haveFastSqrt(Type *Ty) = 0;
1831 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1833 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1834 const APInt &Imm, Type *Ty) = 0;
1835 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1837 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1838 const APInt &Imm, Type *Ty,
1840 Instruction *Inst = nullptr) = 0;
1842 const APInt &Imm, Type *Ty,
1844 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1845 virtual unsigned getRegisterClassForType(bool Vector,
1846 Type *Ty = nullptr) const = 0;
1847 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1849 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1850 virtual std::optional<unsigned> getMaxVScale() const = 0;
1851 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1852 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1853 virtual bool
1855 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1856 bool IsScalable) const = 0;
1857 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1858 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1859 Type *ScalarValTy) const = 0;
1861 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1862 virtual unsigned getCacheLineSize() const = 0;
1863 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1864 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1865 const = 0;
1866
1867 /// \return How much before a load we should place the prefetch
1868 /// instruction. This is currently measured in number of
1869 /// instructions.
1870 virtual unsigned getPrefetchDistance() const = 0;
1871
1872 /// \return Some HW prefetchers can handle accesses up to a certain
1873 /// constant stride. This is the minimum stride in bytes where it
1874 /// makes sense to start adding SW prefetches. The default is 1,
1875 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1876 /// even below the HW prefetcher limit, and the arguments provided are
1877 /// meant to serve as a basis for deciding this for a particular loop.
1878 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1879 unsigned NumStridedMemAccesses,
1880 unsigned NumPrefetches,
1881 bool HasCall) const = 0;
1882
1883 /// \return The maximum number of iterations to prefetch ahead. If
1884 /// the required number of iterations is more than this number, no
1885 /// prefetching is performed.
1886 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1887
1888 /// \return True if prefetching should also be done for writes.
1889 virtual bool enableWritePrefetching() const = 0;
1890
1891 /// \return if target want to issue a prefetch in address space \p AS.
1892 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1893
1894 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
1896 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1897 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1898 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1899
1901 ArrayRef<int> Mask,
1903 int Index, VectorType *SubTp,
1904 ArrayRef<const Value *> Args) = 0;
1905 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1906 Type *Src, CastContextHint CCH,
1908 const Instruction *I) = 0;
1909 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1910 VectorType *VecTy,
1911 unsigned Index) = 0;
1912 virtual InstructionCost getCFInstrCost(unsigned Opcode,
1914 const Instruction *I = nullptr) = 0;
1915 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1916 Type *CondTy,
1917 CmpInst::Predicate VecPred,
1919 const Instruction *I) = 0;
1920 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1922 unsigned Index, Value *Op0,
1923 Value *Op1) = 0;
1926 unsigned Index) = 0;
1927
1928 virtual InstructionCost
1929 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1930 const APInt &DemandedDstElts,
1932
1933 virtual InstructionCost
1934 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1936 OperandValueInfo OpInfo, const Instruction *I) = 0;
1937 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1938 Align Alignment,
1939 unsigned AddressSpace,
1941 const Instruction *I) = 0;
1942 virtual InstructionCost
1943 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1944 unsigned AddressSpace,
1946 virtual InstructionCost
1947 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1948 bool VariableMask, Align Alignment,
1950 const Instruction *I = nullptr) = 0;
1951
1953 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1954 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1955 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1956 virtual InstructionCost
1958 std::optional<FastMathFlags> FMF,
1960 virtual InstructionCost
1964 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1965 FastMathFlags FMF,
1968 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1970 virtual InstructionCost
1974 ArrayRef<Type *> Tys,
1976 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1977 virtual InstructionCost
1979 virtual InstructionCost
1982 MemIntrinsicInfo &Info) = 0;
1983 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1985 Type *ExpectedType) = 0;
1987 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1988 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1989 std::optional<uint32_t> AtomicElementSize) const = 0;
1990
1992 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1993 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1994 unsigned SrcAlign, unsigned DestAlign,
1995 std::optional<uint32_t> AtomicCpySize) const = 0;
1996 virtual bool areInlineCompatible(const Function *Caller,
1997 const Function *Callee) const = 0;
1998 virtual bool areTypesABICompatible(const Function *Caller,
1999 const Function *Callee,
2000 const ArrayRef<Type *> &Types) const = 0;
2001 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2002 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2003 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2004 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2005 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2006 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2007 Align Alignment,
2008 unsigned AddrSpace) const = 0;
2009 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2010 Align Alignment,
2011 unsigned AddrSpace) const = 0;
2013 ElementCount VF) const = 0;
2014 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2015 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2016 unsigned ChainSizeInBytes,
2017 VectorType *VecTy) const = 0;
2018 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2019 unsigned ChainSizeInBytes,
2020 VectorType *VecTy) const = 0;
2021 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2022 ReductionFlags) const = 0;
2023 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2024 ReductionFlags) const = 0;
2025 virtual bool preferEpilogueVectorization() const = 0;
2026
2027 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2028 virtual unsigned getGISelRematGlobalCost() const = 0;
2029 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2030 virtual bool enableScalableVectorization() const = 0;
2031 virtual bool supportsScalableVectors() const = 0;
2032 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2033 Align Alignment) const = 0;
2034 virtual VPLegalization
2036 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2037 virtual unsigned getMaxNumArgs() const = 0;
2038};
2039
2040template <typename T>
2041class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2042 T Impl;
2043
2044public:
2045 Model(T Impl) : Impl(std::move(Impl)) {}
2046 ~Model() override = default;
2047
2048 const DataLayout &getDataLayout() const override {
2049 return Impl.getDataLayout();
2050 }
2051
2052 InstructionCost
2053 getGEPCost(Type *PointeeType, const Value *Ptr,
2054 ArrayRef<const Value *> Operands, Type *AccessType,
2056 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2057 }
2058 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2059 const Value *Base,
2060 const PointersChainInfo &Info,
2061 Type *AccessTy,
2062 TargetCostKind CostKind) override {
2063 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2064 }
2065 unsigned getInliningThresholdMultiplier() const override {
2066 return Impl.getInliningThresholdMultiplier();
2067 }
2068 unsigned adjustInliningThreshold(const CallBase *CB) override {
2069 return Impl.adjustInliningThreshold(CB);
2070 }
2071 int getInlinerVectorBonusPercent() const override {
2072 return Impl.getInlinerVectorBonusPercent();
2073 }
2074 unsigned getCallerAllocaCost(const CallBase *CB,
2075 const AllocaInst *AI) const override {
2076 return Impl.getCallerAllocaCost(CB, AI);
2077 }
2078 InstructionCost getMemcpyCost(const Instruction *I) override {
2079 return Impl.getMemcpyCost(I);
2080 }
2081
2082 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2083 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2084 }
2085
2086 InstructionCost getInstructionCost(const User *U,
2087 ArrayRef<const Value *> Operands,
2088 TargetCostKind CostKind) override {
2089 return Impl.getInstructionCost(U, Operands, CostKind);
2090 }
2091 BranchProbability getPredictableBranchThreshold() override {
2092 return Impl.getPredictableBranchThreshold();
2093 }
2094 bool hasBranchDivergence(const Function *F = nullptr) override {
2095 return Impl.hasBranchDivergence(F);
2096 }
2097 bool isSourceOfDivergence(const Value *V) override {
2098 return Impl.isSourceOfDivergence(V);
2099 }
2100
2101 bool isAlwaysUniform(const Value *V) override {
2102 return Impl.isAlwaysUniform(V);
2103 }
2104
2105 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2106 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2107 }
2108
2109 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2110 return Impl.addrspacesMayAlias(AS0, AS1);
2111 }
2112
2113 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2114
2115 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2116 Intrinsic::ID IID) const override {
2117 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2118 }
2119
2120 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2121 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2122 }
2123
2124 bool
2125 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2126 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2127 }
2128
2129 unsigned getAssumedAddrSpace(const Value *V) const override {
2130 return Impl.getAssumedAddrSpace(V);
2131 }
2132
2133 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2134
2135 std::pair<const Value *, unsigned>
2136 getPredicatedAddrSpace(const Value *V) const override {
2137 return Impl.getPredicatedAddrSpace(V);
2138 }
2139
2140 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2141 Value *NewV) const override {
2142 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2143 }
2144
2145 bool isLoweredToCall(const Function *F) override {
2146 return Impl.isLoweredToCall(F);
2147 }
2148 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2149 UnrollingPreferences &UP,
2150 OptimizationRemarkEmitter *ORE) override {
2151 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2152 }
2153 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2154 PeelingPreferences &PP) override {
2155 return Impl.getPeelingPreferences(L, SE, PP);
2156 }
2157 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2158 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2159 HardwareLoopInfo &HWLoopInfo) override {
2160 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2161 }
2162 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2163 return Impl.preferPredicateOverEpilogue(TFI);
2164 }
2166 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2167 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2168 }
2169 std::optional<Instruction *>
2170 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2171 return Impl.instCombineIntrinsic(IC, II);
2172 }
2173 std::optional<Value *>
2174 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2175 APInt DemandedMask, KnownBits &Known,
2176 bool &KnownBitsComputed) override {
2177 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2178 KnownBitsComputed);
2179 }
2180 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2181 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2182 APInt &UndefElts2, APInt &UndefElts3,
2183 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2184 SimplifyAndSetOp) override {
2185 return Impl.simplifyDemandedVectorEltsIntrinsic(
2186 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2187 SimplifyAndSetOp);
2188 }
2189 bool isLegalAddImmediate(int64_t Imm) override {
2190 return Impl.isLegalAddImmediate(Imm);
2191 }
2192 bool isLegalICmpImmediate(int64_t Imm) override {
2193 return Impl.isLegalICmpImmediate(Imm);
2194 }
2195 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2196 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2197 Instruction *I) override {
2198 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2199 AddrSpace, I);
2200 }
2201 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2202 const TargetTransformInfo::LSRCost &C2) override {
2203 return Impl.isLSRCostLess(C1, C2);
2204 }
2205 bool isNumRegsMajorCostOfLSR() override {
2206 return Impl.isNumRegsMajorCostOfLSR();
2207 }
2208 bool isProfitableLSRChainElement(Instruction *I) override {
2209 return Impl.isProfitableLSRChainElement(I);
2210 }
2211 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2212 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2213 DominatorTree *DT, AssumptionCache *AC,
2214 TargetLibraryInfo *LibInfo) override {
2215 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2216 }
2218 getPreferredAddressingMode(const Loop *L,
2219 ScalarEvolution *SE) const override {
2220 return Impl.getPreferredAddressingMode(L, SE);
2221 }
2222 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2223 return Impl.isLegalMaskedStore(DataType, Alignment);
2224 }
2225 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2226 return Impl.isLegalMaskedLoad(DataType, Alignment);
2227 }
2228 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2229 return Impl.isLegalNTStore(DataType, Alignment);
2230 }
2231 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2232 return Impl.isLegalNTLoad(DataType, Alignment);
2233 }
2234 bool isLegalBroadcastLoad(Type *ElementTy,
2235 ElementCount NumElements) const override {
2236 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2237 }
2238 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2239 return Impl.isLegalMaskedScatter(DataType, Alignment);
2240 }
2241 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2242 return Impl.isLegalMaskedGather(DataType, Alignment);
2243 }
2244 bool forceScalarizeMaskedGather(VectorType *DataType,
2245 Align Alignment) override {
2246 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2247 }
2248 bool forceScalarizeMaskedScatter(VectorType *DataType,
2249 Align Alignment) override {
2250 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2251 }
2252 bool isLegalMaskedCompressStore(Type *DataType) override {
2253 return Impl.isLegalMaskedCompressStore(DataType);
2254 }
2255 bool isLegalMaskedExpandLoad(Type *DataType) override {
2256 return Impl.isLegalMaskedExpandLoad(DataType);
2257 }
2258 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2259 const SmallBitVector &OpcodeMask) const override {
2260 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2261 }
2262 bool enableOrderedReductions() override {
2263 return Impl.enableOrderedReductions();
2264 }
2265 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2266 return Impl.hasDivRemOp(DataType, IsSigned);
2267 }
2268 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2269 return Impl.hasVolatileVariant(I, AddrSpace);
2270 }
2271 bool prefersVectorizedAddressing() override {
2272 return Impl.prefersVectorizedAddressing();
2273 }
2274 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2275 int64_t BaseOffset, bool HasBaseReg,
2276 int64_t Scale,
2277 unsigned AddrSpace) override {
2278 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2279 AddrSpace);
2280 }
2281 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2282 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2283 return Impl.isTruncateFree(Ty1, Ty2);
2284 }
2285 bool isProfitableToHoist(Instruction *I) override {
2286 return Impl.isProfitableToHoist(I);
2287 }
2288 bool useAA() override { return Impl.useAA(); }
2289 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2290 unsigned getRegUsageForType(Type *Ty) override {
2291 return Impl.getRegUsageForType(Ty);
2292 }
2293 bool shouldBuildLookupTables() override {
2294 return Impl.shouldBuildLookupTables();
2295 }
2296 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2297 return Impl.shouldBuildLookupTablesForConstant(C);
2298 }
2299 bool shouldBuildRelLookupTables() override {
2300 return Impl.shouldBuildRelLookupTables();
2301 }
2302 bool useColdCCForColdCall(Function &F) override {
2303 return Impl.useColdCCForColdCall(F);
2304 }
2305
2306 InstructionCost getScalarizationOverhead(VectorType *Ty,
2307 const APInt &DemandedElts,
2308 bool Insert, bool Extract,
2309 TargetCostKind CostKind) override {
2310 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2311 CostKind);
2312 }
2313 InstructionCost
2314 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2315 ArrayRef<Type *> Tys,
2316 TargetCostKind CostKind) override {
2317 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2318 }
2319
2320 bool supportsEfficientVectorElementLoadStore() override {
2321 return Impl.supportsEfficientVectorElementLoadStore();
2322 }
2323
2324 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2325 bool supportsTailCallFor(const CallBase *CB) override {
2326 return Impl.supportsTailCallFor(CB);
2327 }
2328
2329 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2330 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2331 }
2332 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2333 bool IsZeroCmp) const override {
2334 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2335 }
2336 bool enableInterleavedAccessVectorization() override {
2337 return Impl.enableInterleavedAccessVectorization();
2338 }
2339 bool enableSelectOptimize() override {
2340 return Impl.enableSelectOptimize();
2341 }
2342 bool enableMaskedInterleavedAccessVectorization() override {
2343 return Impl.enableMaskedInterleavedAccessVectorization();
2344 }
2345 bool isFPVectorizationPotentiallyUnsafe() override {
2346 return Impl.isFPVectorizationPotentiallyUnsafe();
2347 }
2348 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2349 unsigned AddressSpace, Align Alignment,
2350 unsigned *Fast) override {
2351 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2352 Alignment, Fast);
2353 }
2354 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2355 return Impl.getPopcntSupport(IntTyWidthInBit);
2356 }
2357 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2358
2359 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2360 return Impl.isExpensiveToSpeculativelyExecute(I);
2361 }
2362
2363 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2364 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2365 }
2366
2367 InstructionCost getFPOpCost(Type *Ty) override {
2368 return Impl.getFPOpCost(Ty);
2369 }
2370
2371 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2372 const APInt &Imm, Type *Ty) override {
2373 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2374 }
2375 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2376 TargetCostKind CostKind) override {
2377 return Impl.getIntImmCost(Imm, Ty, CostKind);
2378 }
2379 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2380 const APInt &Imm, Type *Ty,
2382 Instruction *Inst = nullptr) override {
2383 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2384 }
2385 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2386 const APInt &Imm, Type *Ty,
2387 TargetCostKind CostKind) override {
2388 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2389 }
2390 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2391 return Impl.getNumberOfRegisters(ClassID);
2392 }
2393 unsigned getRegisterClassForType(bool Vector,
2394 Type *Ty = nullptr) const override {
2395 return Impl.getRegisterClassForType(Vector, Ty);
2396 }
2397 const char *getRegisterClassName(unsigned ClassID) const override {
2398 return Impl.getRegisterClassName(ClassID);
2399 }
2400 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2401 return Impl.getRegisterBitWidth(K);
2402 }
2403 unsigned getMinVectorRegisterBitWidth() const override {
2404 return Impl.getMinVectorRegisterBitWidth();
2405 }
2406 std::optional<unsigned> getMaxVScale() const override {
2407 return Impl.getMaxVScale();
2408 }
2409 std::optional<unsigned> getVScaleForTuning() const override {
2410 return Impl.getVScaleForTuning();
2411 }
2412 bool isVScaleKnownToBeAPowerOfTwo() const override {
2413 return Impl.isVScaleKnownToBeAPowerOfTwo();
2414 }
2415 bool shouldMaximizeVectorBandwidth(
2416 TargetTransformInfo::RegisterKind K) const override {
2417 return Impl.shouldMaximizeVectorBandwidth(K);
2418 }
2419 ElementCount getMinimumVF(unsigned ElemWidth,
2420 bool IsScalable) const override {
2421 return Impl.getMinimumVF(ElemWidth, IsScalable);
2422 }
2423 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2424 return Impl.getMaximumVF(ElemWidth, Opcode);
2425 }
2426 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2427 Type *ScalarValTy) const override {
2428 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2429 }
2430 bool shouldConsiderAddressTypePromotion(
2431 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2432 return Impl.shouldConsiderAddressTypePromotion(
2433 I, AllowPromotionWithoutCommonHeader);
2434 }
2435 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2436 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2437 return Impl.getCacheSize(Level);
2438 }
2439 std::optional<unsigned>
2440 getCacheAssociativity(CacheLevel Level) const override {
2441 return Impl.getCacheAssociativity(Level);
2442 }
2443
2444 /// Return the preferred prefetch distance in terms of instructions.
2445 ///
2446 unsigned getPrefetchDistance() const override {
2447 return Impl.getPrefetchDistance();
2448 }
2449
2450 /// Return the minimum stride necessary to trigger software
2451 /// prefetching.
2452 ///
2453 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2454 unsigned NumStridedMemAccesses,
2455 unsigned NumPrefetches,
2456 bool HasCall) const override {
2457 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2458 NumPrefetches, HasCall);
2459 }
2460
2461 /// Return the maximum prefetch distance in terms of loop
2462 /// iterations.
2463 ///
2464 unsigned getMaxPrefetchIterationsAhead() const override {
2465 return Impl.getMaxPrefetchIterationsAhead();
2466 }
2467
2468 /// \return True if prefetching should also be done for writes.
2469 bool enableWritePrefetching() const override {
2470 return Impl.enableWritePrefetching();
2471 }
2472
2473 /// \return if target want to issue a prefetch in address space \p AS.
2474 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2475 return Impl.shouldPrefetchAddressSpace(AS);
2476 }
2477
2478 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2479 return Impl.getMaxInterleaveFactor(VF);
2480 }
2481 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2482 unsigned &JTSize,
2483 ProfileSummaryInfo *PSI,
2484 BlockFrequencyInfo *BFI) override {
2485 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2486 }
2487 InstructionCost getArithmeticInstrCost(
2488 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2489 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2490 ArrayRef<const Value *> Args,
2491 const Instruction *CxtI = nullptr) override {
2492 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2493 Args, CxtI);
2494 }
2495
2496 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2497 ArrayRef<int> Mask,
2499 VectorType *SubTp,
2500 ArrayRef<const Value *> Args) override {
2501 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2502 }
2503 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2504 CastContextHint CCH,
2506 const Instruction *I) override {
2507 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2508 }
2509 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2510 VectorType *VecTy,
2511 unsigned Index) override {
2512 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2513 }
2514 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2515 const Instruction *I = nullptr) override {
2516 return Impl.getCFInstrCost(Opcode, CostKind, I);
2517 }
2518 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2519 CmpInst::Predicate VecPred,
2521 const Instruction *I) override {
2522 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2523 }
2524 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2526 unsigned Index, Value *Op0,
2527 Value *Op1) override {
2528 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2529 }
2530 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2532 unsigned Index) override {
2533 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2534 }
2535 InstructionCost
2536 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2537 const APInt &DemandedDstElts,
2538 TTI::TargetCostKind CostKind) override {
2539 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2540 DemandedDstElts, CostKind);
2541 }
2542 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2543 unsigned AddressSpace,
2545 OperandValueInfo OpInfo,
2546 const Instruction *I) override {
2547 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2548 OpInfo, I);
2549 }
2550 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2551 unsigned AddressSpace,
2553 const Instruction *I) override {
2554 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2555 CostKind, I);
2556 }
2557 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2558 Align Alignment, unsigned AddressSpace,
2559 TTI::TargetCostKind CostKind) override {
2560 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2561 CostKind);
2562 }
2563 InstructionCost
2564 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2565 bool VariableMask, Align Alignment,
2567 const Instruction *I = nullptr) override {
2568 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2569 Alignment, CostKind, I);
2570 }
2571 InstructionCost getInterleavedMemoryOpCost(
2572 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2573 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2574 bool UseMaskForCond, bool UseMaskForGaps) override {
2575 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2576 Alignment, AddressSpace, CostKind,
2577 UseMaskForCond, UseMaskForGaps);
2578 }
2579 InstructionCost
2580 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2581 std::optional<FastMathFlags> FMF,
2582 TTI::TargetCostKind CostKind) override {
2583 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2584 }
2585 InstructionCost
2586 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2587 TTI::TargetCostKind CostKind) override {
2588 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2589 }
2590 InstructionCost
2591 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2592 VectorType *Ty, FastMathFlags FMF,
2593 TTI::TargetCostKind CostKind) override {
2594 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2595 CostKind);
2596 }
2597 InstructionCost
2598 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2599 TTI::TargetCostKind CostKind) override {
2600 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2601 }
2602 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2603 TTI::TargetCostKind CostKind) override {
2604 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2605 }
2606 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2607 ArrayRef<Type *> Tys,
2608 TTI::TargetCostKind CostKind) override {
2609 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2610 }
2611 unsigned getNumberOfParts(Type *Tp) override {
2612 return Impl.getNumberOfParts(Tp);
2613 }
2614 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2615 const SCEV *Ptr) override {
2616 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2617 }
2618 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2619 return Impl.getCostOfKeepingLiveOverCall(Tys);
2620 }
2621 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2622 MemIntrinsicInfo &Info) override {
2623 return Impl.getTgtMemIntrinsic(Inst, Info);
2624 }
2625 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2626 return Impl.getAtomicMemIntrinsicMaxElementSize();
2627 }
2628 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2629 Type *ExpectedType) override {
2630 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2631 }
2632 Type *getMemcpyLoopLoweringType(
2633 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2634 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2635 std::optional<uint32_t> AtomicElementSize) const override {
2636 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2637 DestAddrSpace, SrcAlign, DestAlign,
2638 AtomicElementSize);
2639 }
2640 void getMemcpyLoopResidualLoweringType(
2641 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2642 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2643 unsigned SrcAlign, unsigned DestAlign,
2644 std::optional<uint32_t> AtomicCpySize) const override {
2645 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2646 SrcAddrSpace, DestAddrSpace,
2647 SrcAlign, DestAlign, AtomicCpySize);
2648 }
2649 bool areInlineCompatible(const Function *Caller,
2650 const Function *Callee) const override {
2651 return Impl.areInlineCompatible(Caller, Callee);
2652 }
2653 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2654 const ArrayRef<Type *> &Types) const override {
2655 return Impl.areTypesABICompatible(Caller, Callee, Types);
2656 }
2657 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2658 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2659 }
2660 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2661 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2662 }
2663 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2664 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2665 }
2666 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2667 return Impl.isLegalToVectorizeLoad(LI);
2668 }
2669 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2670 return Impl.isLegalToVectorizeStore(SI);
2671 }
2672 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2673 unsigned AddrSpace) const override {
2674 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2675 AddrSpace);
2676 }
2677 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2678 unsigned AddrSpace) const override {
2679 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2680 AddrSpace);
2681 }
2682 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2683 ElementCount VF) const override {
2684 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2685 }
2686 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2687 return Impl.isElementTypeLegalForScalableVector(Ty);
2688 }
2689 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2690 unsigned ChainSizeInBytes,
2691 VectorType *VecTy) const override {
2692 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2693 }
2694 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2695 unsigned ChainSizeInBytes,
2696 VectorType *VecTy) const override {
2697 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2698 }
2699 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2700 ReductionFlags Flags) const override {
2701 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2702 }
2703 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2704 ReductionFlags Flags) const override {
2705 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2706 }
2707 bool preferEpilogueVectorization() const override {
2708 return Impl.preferEpilogueVectorization();
2709 }
2710
2711 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2712 return Impl.shouldExpandReduction(II);
2713 }
2714
2715 unsigned getGISelRematGlobalCost() const override {
2716 return Impl.getGISelRematGlobalCost();
2717 }
2718
2719 unsigned getMinTripCountTailFoldingThreshold() const override {
2720 return Impl.getMinTripCountTailFoldingThreshold();
2721 }
2722
2723 bool supportsScalableVectors() const override {
2724 return Impl.supportsScalableVectors();
2725 }
2726
2727 bool enableScalableVectorization() const override {
2728 return Impl.enableScalableVectorization();
2729 }
2730
2731 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2732 Align Alignment) const override {
2733 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2734 }
2735
2737 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2738 return Impl.getVPLegalizationStrategy(PI);
2739 }
2740
2741 bool hasArmWideBranch(bool Thumb) const override {
2742 return Impl.hasArmWideBranch(Thumb);
2743 }
2744
2745 unsigned getMaxNumArgs() const override {
2746 return Impl.getMaxNumArgs();
2747 }
2748};
2749
2750template <typename T>
2752 : TTIImpl(new Model<T>(Impl)) {}
2753
2754/// Analysis pass providing the \c TargetTransformInfo.
2755///
2756/// The core idea of the TargetIRAnalysis is to expose an interface through
2757/// which LLVM targets can analyze and provide information about the middle
2758/// end's target-independent IR. This supports use cases such as target-aware
2759/// cost modeling of IR constructs.
2760///
2761/// This is a function analysis because much of the cost modeling for targets
2762/// is done in a subtarget specific way and LLVM supports compiling different
2763/// functions targeting different subtargets in order to support runtime
2764/// dispatch according to the observed subtarget.
2765class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2766public:
2768
2769 /// Default construct a target IR analysis.
2770 ///
2771 /// This will use the module's datalayout to construct a baseline
2772 /// conservative TTI result.
2774
2775 /// Construct an IR analysis pass around a target-provide callback.
2776 ///
2777 /// The callback will be called with a particular function for which the TTI
2778 /// is needed and must return a TTI object for that function.
2779 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2780
2781 // Value semantics. We spell out the constructors for MSVC.
2783 : TTICallback(Arg.TTICallback) {}
2785 : TTICallback(std::move(Arg.TTICallback)) {}
2787 TTICallback = RHS.TTICallback;
2788 return *this;
2789 }
2791 TTICallback = std::move(RHS.TTICallback);
2792 return *this;
2793 }
2794
2796
2797private:
2799 static AnalysisKey Key;
2800
2801 /// The callback used to produce a result.
2802 ///
2803 /// We use a completely opaque callback so that targets can provide whatever
2804 /// mechanism they desire for constructing the TTI for a given function.
2805 ///
2806 /// FIXME: Should we really use std::function? It's relatively inefficient.
2807 /// It might be possible to arrange for even stateful callbacks to outlive
2808 /// the analysis and thus use a function_ref which would be lighter weight.
2809 /// This may also be less error prone as the callback is likely to reference
2810 /// the external TargetMachine, and that reference needs to never dangle.
2811 std::function<Result(const Function &)> TTICallback;
2812
2813 /// Helper function used as the callback in the default constructor.
2814 static Result getDefaultTTI(const Function &F);
2815};
2816
2817/// Wrapper pass for TargetTransformInfo.
2818///
2819/// This pass can be constructed from a TTI object which it stores internally
2820/// and is queried by passes.
2822 TargetIRAnalysis TIRA;
2823 std::optional<TargetTransformInfo> TTI;
2824
2825 virtual void anchor();
2826
2827public:
2828 static char ID;
2829
2830 /// We must provide a default constructor for the pass but it should
2831 /// never be used.
2832 ///
2833 /// Use the constructor below or call one of the creation routines.
2835
2837
2839};
2840
2841/// Create an analysis pass wrapper around a TTI object.
2842///
2843/// This analysis pass just holds the TTI instance and makes it available to
2844/// clients.
2846
2847} // namespace llvm
2848
2849#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:58
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:46
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:777
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:72
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool hasArmWideBranch(bool Thumb) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:400
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:440
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
@ None
Not a recurrence.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1854
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)