LLVM 23.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/Uniformity.h"
30#include "llvm/IR/FMF.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/PassManager.h"
33#include "llvm/Pass.h"
38#include <functional>
39#include <optional>
40#include <utility>
41
42namespace llvm {
43
44namespace Intrinsic {
45typedef unsigned ID;
46}
47
48class AllocaInst;
49class AssumptionCache;
51class DominatorTree;
52class CondBrInst;
53class Function;
54class GlobalValue;
55class InstCombiner;
58class IntrinsicInst;
59class LoadInst;
60class Loop;
61class LoopInfo;
65class SCEV;
66class ScalarEvolution;
67class SmallBitVector;
68class StoreInst;
69class SwitchInst;
71class Type;
72class VPIntrinsic;
73struct KnownBits;
74
75/// Information about a load/store intrinsic defined by the target.
77 /// This is the pointer that the intrinsic is loading from or storing to.
78 /// If this is non-null, then analysis/optimization passes can assume that
79 /// this intrinsic is functionally equivalent to a load/store from this
80 /// pointer.
81 Value *PtrVal = nullptr;
82
83 // Ordering for atomic operations.
85
86 // Same Id is set by the target for corresponding load/store intrinsics.
87 unsigned short MatchingId = 0;
88
89 bool ReadMem = false;
90 bool WriteMem = false;
91 bool IsVolatile = false;
92
94
100};
101
102/// Attributes of a target dependent hardware loop.
106 Loop *L = nullptr;
109 const SCEV *ExitCount = nullptr;
111 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
112 // value in every iteration.
113 bool IsNestingLegal = false; // Can a hardware loop be a parent to
114 // another hardware loop?
115 bool CounterInReg = false; // Should loop counter be updated in
116 // the loop via a phi?
117 bool PerformEntryTest = false; // Generate the intrinsic which also performs
118 // icmp ne zero on the loop counter value and
119 // produces an i1 to guard the loop entry.
121 DominatorTree &DT,
122 bool ForceNestedLoop = false,
123 bool ForceHardwareLoopPHI = false);
124 LLVM_ABI bool canAnalyze(LoopInfo &LI);
125};
126
127/// Information for memory intrinsic cost model.
129 /// Optional context instruction, if one exists, e.g. the
130 /// load/store to transform to the intrinsic.
131 const Instruction *I = nullptr;
132
133 /// Address in memory.
134 const Value *Ptr = nullptr;
135
136 /// Vector type of the data to be loaded or stored.
137 Type *DataTy = nullptr;
138
139 /// ID of the memory intrinsic.
140 Intrinsic::ID IID;
141
142 /// True when the memory access is predicated with a mask
143 /// that is not a compile-time constant.
144 bool VariableMask = true;
145
146 /// Address space of the pointer.
147 unsigned AddressSpace = 0;
148
149 /// Alignment of single element.
150 Align Alignment;
151
152public:
154 bool VariableMask, Align Alignment,
155 const Instruction *I = nullptr)
156 : I(I), Ptr(Ptr), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
157 Alignment(Alignment) {}
158
160 unsigned AddressSpace = 0)
161 : DataTy(DataTy), IID(Id), AddressSpace(AddressSpace),
162 Alignment(Alignment) {}
163
164 MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, bool VariableMask,
165 Align Alignment, const Instruction *I = nullptr)
166 : I(I), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
167 Alignment(Alignment) {}
168
169 Intrinsic::ID getID() const { return IID; }
170 const Instruction *getInst() const { return I; }
171 const Value *getPointer() const { return Ptr; }
172 Type *getDataType() const { return DataTy; }
173 bool getVariableMask() const { return VariableMask; }
174 unsigned getAddressSpace() const { return AddressSpace; }
175 Align getAlignment() const { return Alignment; }
176};
177
179 const IntrinsicInst *II = nullptr;
180 Type *RetTy = nullptr;
181 Intrinsic::ID IID;
182 SmallVector<Type *, 4> ParamTys;
184 FastMathFlags FMF;
185 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
186 // arguments and the return value will be computed based on types.
187 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
188
189public:
191 Intrinsic::ID Id, const CallBase &CI,
193 bool TypeBasedOnly = false);
194
196 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
197 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
199
202
206 const IntrinsicInst *I = nullptr,
208
209 Intrinsic::ID getID() const { return IID; }
210 const IntrinsicInst *getInst() const { return II; }
211 Type *getReturnType() const { return RetTy; }
212 FastMathFlags getFlags() const { return FMF; }
213 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
214 const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
215 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
216
217 bool isTypeBasedOnly() const {
218 return Arguments.empty();
219 }
220
221 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
222};
223
225 /// Don't use tail folding
227 /// Use predicate only to mask operations on data in the loop.
228 /// When the VL is not known to be a power-of-2, this method requires a
229 /// runtime overflow check for the i + VL in the loop because it compares the
230 /// scalar induction variable against the tripcount rounded up by VL which may
231 /// overflow. When the VL is a power-of-2, both the increment and uprounded
232 /// tripcount will overflow to 0, which does not require a runtime check
233 /// since the loop is exited when the loop induction variable equals the
234 /// uprounded trip-count, which are both 0.
236 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
237 /// calculate the mask and instead implements this with a
238 /// splat/stepvector/cmp.
239 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
240 /// active.lane.mask intrinsic when it is not natively supported?
242 /// Use predicate to control both data and control flow.
243 /// This method always requires a runtime overflow check for the i + VL
244 /// increment inside the loop, because it uses the result direclty in the
245 /// active.lane.mask to calculate the mask for the next iteration. If the
246 /// increment overflows, the mask is no longer correct.
248 /// Use predicated EVL instructions for tail-folding.
249 /// Indicates that VP intrinsics should be used.
251};
252
261
262class TargetTransformInfo;
265
266/// This pass provides access to the codegen interfaces that are needed
267/// for IR-level transformations.
269public:
276
277 /// Get the kind of extension that an instruction represents.
280 /// Get the kind of extension that a cast opcode represents.
283 /// Get the cast opcode for an extension kind.
286
287 /// Construct a TTI object using a type implementing the \c Concept
288 /// API below.
289 ///
290 /// This is used by targets to construct a TTI wrapping their target-specific
291 /// implementation that encodes appropriate costs for their target.
293 std::unique_ptr<const TargetTransformInfoImplBase> Impl);
294
295 /// Construct a baseline TTI object using a minimal implementation of
296 /// the \c Concept API below.
297 ///
298 /// The TTI implementation will reflect the information in the DataLayout
299 /// provided if non-null.
300 LLVM_ABI explicit TargetTransformInfo(const DataLayout &DL);
301
302 // Provide move semantics.
305
306 // We need to define the destructor out-of-line to define our sub-classes
307 // out-of-line.
309
310 /// Handle the invalidation of this information.
311 ///
312 /// When used as a result of \c TargetIRAnalysis this method will be called
313 /// when the function this was computed for changes. When it returns false,
314 /// the information is preserved across those changes.
316 FunctionAnalysisManager::Invalidator &) {
317 // FIXME: We should probably in some way ensure that the subtarget
318 // information for a function hasn't changed.
319 return false;
320 }
321
322 /// \name Generic Target Information
323 /// @{
324
325 /// The kind of cost model.
326 ///
327 /// There are several different cost models that can be customized by the
328 /// target. The normalization of each cost model may be target specific.
329 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
330 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
332 TCK_RecipThroughput, ///< Reciprocal throughput.
333 TCK_Latency, ///< The latency of instruction.
334 TCK_CodeSize, ///< Instruction code size.
335 TCK_SizeAndLatency ///< The weighted sum of size and latency.
336 };
337
338 /// Underlying constants for 'cost' values in this interface.
339 ///
340 /// Many APIs in this interface return a cost. This enum defines the
341 /// fundamental values that should be used to interpret (and produce) those
342 /// costs. The costs are returned as an int rather than a member of this
343 /// enumeration because it is expected that the cost of one IR instruction
344 /// may have a multiplicative factor to it or otherwise won't fit directly
345 /// into the enum. Moreover, it is common to sum or average costs which works
346 /// better as simple integral values. Thus this enum only provides constants.
347 /// Also note that the returned costs are signed integers to make it natural
348 /// to add, subtract, and test with zero (a common boundary condition). It is
349 /// not expected that 2^32 is a realistic cost to be modeling at any point.
350 ///
351 /// Note that these costs should usually reflect the intersection of code-size
352 /// cost and execution cost. A free instruction is typically one that folds
353 /// into another instruction. For example, reg-to-reg moves can often be
354 /// skipped by renaming the registers in the CPU, but they still are encoded
355 /// and thus wouldn't be considered 'free' here.
357 TCC_Free = 0, ///< Expected to fold away in lowering.
358 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
359 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
360 };
361
362 /// Estimate the cost of a GEP operation when lowered.
363 ///
364 /// \p PointeeType is the source element type of the GEP.
365 /// \p Ptr is the base pointer operand.
366 /// \p Operands is the list of indices following the base pointer.
367 ///
368 /// \p AccessType is a hint as to what type of memory might be accessed by
369 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
370 /// folded into the addressing mode of a load/store. If AccessType is null,
371 /// then the resulting target type based off of PointeeType will be used as an
372 /// approximation.
374 getGEPCost(Type *PointeeType, const Value *Ptr,
375 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
376 TargetCostKind CostKind = TCK_SizeAndLatency) const;
377
378 /// Describe known properties for a set of pointers.
380 /// All the GEPs in a set have same base address.
381 unsigned IsSameBaseAddress : 1;
382 /// These properties only valid if SameBaseAddress is set.
383 /// True if all pointers are separated by a unit stride.
384 unsigned IsUnitStride : 1;
385 /// True if distance between any two neigbouring pointers is a known value.
386 unsigned IsKnownStride : 1;
387 unsigned Reserved : 29;
388
389 bool isSameBase() const { return IsSameBaseAddress; }
390 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
392
394 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
395 /*IsKnownStride=*/1, 0};
396 }
398 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
399 /*IsKnownStride=*/1, 0};
400 }
402 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
403 /*IsKnownStride=*/0, 0};
404 }
405 };
406 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
407
408 /// Estimate the cost of a chain of pointers (typically pointer operands of a
409 /// chain of loads or stores within same block) operations set when lowered.
410 /// \p AccessTy is the type of the loads/stores that will ultimately use the
411 /// \p Ptrs.
414 const PointersChainInfo &Info, Type *AccessTy,
415 TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
416
417 /// \returns A value by which our inlining threshold should be multiplied.
418 /// This is primarily used to bump up the inlining threshold wholesale on
419 /// targets where calls are unusually expensive.
420 ///
421 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
422 /// individual classes of instructions would be better.
424
427
428 /// \returns The bonus of inlining the last call to a static function.
430
431 /// \returns A value to be added to the inlining threshold.
432 LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const;
433
434 /// \returns The cost of having an Alloca in the caller if not inlined, to be
435 /// added to the threshold
436 LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB,
437 const AllocaInst *AI) const;
438
439 /// \returns Vector bonus in percent.
440 ///
441 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
442 /// and apply this bonus based on the percentage of vector instructions. A
443 /// bonus is applied if the vector instructions exceed 50% and half that
444 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
445 /// arbitrary and evolved over time by accident as much as because they are
446 /// principled bonuses.
447 /// FIXME: It would be nice to base the bonus values on something more
448 /// scientific. A target may has no bonus on vector instructions.
450
451 /// \return the expected cost of a memcpy, which could e.g. depend on the
452 /// source/destination type and alignment and the number of bytes copied.
454
455 /// Returns the maximum memset / memcpy size in bytes that still makes it
456 /// profitable to inline the call.
458
459 /// \return The estimated number of case clusters when lowering \p 'SI'.
460 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
461 /// table.
462 LLVM_ABI unsigned
463 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
465 BlockFrequencyInfo *BFI) const;
466
467 /// Estimate the cost of a given IR user when lowered.
468 ///
469 /// This can estimate the cost of either a ConstantExpr or Instruction when
470 /// lowered.
471 ///
472 /// \p Operands is a list of operands which can be a result of transformations
473 /// of the current operands. The number of the operands on the list must equal
474 /// to the number of the current operands the IR user has. Their order on the
475 /// list must be the same as the order of the current operands the IR user
476 /// has.
477 ///
478 /// The returned cost is defined in terms of \c TargetCostConstants, see its
479 /// comments for a detailed explanation of the cost values.
482 TargetCostKind CostKind) const;
483
484 /// This is a helper function which calls the three-argument
485 /// getInstructionCost with \p Operands which are the current operands U has.
487 TargetCostKind CostKind) const {
488 SmallVector<const Value *, 4> Operands(U->operand_values());
489 return getInstructionCost(U, Operands, CostKind);
490 }
491
492 /// If a branch or a select condition is skewed in one direction by more than
493 /// this factor, it is very likely to be predicted correctly.
495
496 /// Returns estimated penalty of a branch misprediction in latency. Indicates
497 /// how aggressive the target wants for eliminating unpredictable branches. A
498 /// zero return value means extra optimization applied to them should be
499 /// minimal.
501
502 /// Return true if branch divergence exists.
503 ///
504 /// Branch divergence has a significantly negative impact on GPU performance
505 /// when threads in the same wavefront take different paths due to conditional
506 /// branches.
507 ///
508 /// If \p F is passed, provides a context function. If \p F is known to only
509 /// execute in a single threaded environment, the target may choose to skip
510 /// uniformity analysis and assume all values are uniform.
511 LLVM_ABI bool hasBranchDivergence(const Function *F = nullptr) const;
512
513 /// Get target-specific uniformity information for a value.
514 /// This allows targets to provide more fine-grained control over
515 /// uniformity analysis by specifying whether specific values
516 /// should always or never be considered uniform, or require custom
517 /// operand-based analysis.
518 /// \param V The value to query for uniformity information.
519 /// \return ValueUniformity.
521
522 /// Query the target whether the specified address space cast from FromAS to
523 /// ToAS is valid.
524 LLVM_ABI bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
525
526 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
527 LLVM_ABI bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
528
529 /// Returns the address space ID for a target's 'flat' address space. Note
530 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
531 /// refers to as the generic address space. The flat address space is a
532 /// generic address space that can be used access multiple segments of memory
533 /// with different address spaces. Access of a memory location through a
534 /// pointer with this address space is expected to be legal but slower
535 /// compared to the same memory location accessed through a pointer with a
536 /// different address space.
537 //
538 /// This is for targets with different pointer representations which can
539 /// be converted with the addrspacecast instruction. If a pointer is converted
540 /// to this address space, optimizations should attempt to replace the access
541 /// with the source address space.
542 ///
543 /// \returns ~0u if the target does not have such a flat address space to
544 /// optimize away.
545 LLVM_ABI unsigned getFlatAddressSpace() const;
546
547 /// Return any intrinsic address operand indexes which may be rewritten if
548 /// they use a flat address space pointer.
549 ///
550 /// \returns true if the intrinsic was handled.
552 Intrinsic::ID IID) const;
553
554 LLVM_ABI bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
555
556 // Given an address space cast of the given pointer value, calculate the known
557 // bits of the source pointer in the source addrspace and the destination
558 // pointer in the destination addrspace.
559 LLVM_ABI std::pair<KnownBits, KnownBits>
560 computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const;
561
562 // Given an address space cast, calculate the known bits of the resulting ptr
563 // in the destination addrspace using the known bits of the source pointer in
564 // the source addrspace.
566 unsigned FromAS, unsigned ToAS, const KnownBits &FromPtrBits) const;
567
568 /// Returns a mask indicating which bits of a pointer remain unchanged when
569 /// casting between address spaces. The returned APInt has the same bit width
570 /// as the source address space pointer size.
571 ///
572 /// Some targets allow certain bits of a pointer to change (e.g., the low
573 /// bits within a page) while still preserving the address space. This mask
574 /// identifies those bits that are guaranteed to be preserved. If the mask is
575 /// all zeros, no bits are preserved and address space inference cannot be
576 /// performed safely.
577 ///
578 /// For example, given:
579 /// %gp = addrspacecast ptr addrspace(2) %sp to ptr
580 /// %a = ptrtoint ptr %gp to i64
581 /// %b = xor i64 7, %a
582 /// %gp2 = inttoptr i64 %b to ptr
583 /// store i16 0, ptr %gp2, align 2
584 /// if the target preserves the upper bits, `%gp2` can be safely replaced
585 /// with `inttoptr i64 %b to ptr addrspace(2)`.
587 unsigned DstAS) const;
588
589 /// Return true if globals in this address space can have initializers other
590 /// than `undef`.
591 LLVM_ABI bool
593
594 LLVM_ABI unsigned getAssumedAddrSpace(const Value *V) const;
595
596 LLVM_ABI bool isSingleThreaded() const;
597
598 LLVM_ABI std::pair<const Value *, unsigned>
599 getPredicatedAddrSpace(const Value *V) const;
600
601 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
602 /// NewV, which has a different address space. This should happen for every
603 /// operand index that collectFlatAddressOperands returned for the intrinsic.
604 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
605 /// new value (which may be the original \p II with modified operands).
607 Value *OldV,
608 Value *NewV) const;
609
610 /// Test whether calls to a function lower to actual program function
611 /// calls.
612 ///
613 /// The idea is to test whether the program is likely to require a 'call'
614 /// instruction or equivalent in order to call the given function.
615 ///
616 /// FIXME: It's not clear that this is a good or useful query API. Client's
617 /// should probably move to simpler cost metrics using the above.
618 /// Alternatively, we could split the cost interface into distinct code-size
619 /// and execution-speed costs. This would allow modelling the core of this
620 /// query more accurately as a call is a single small instruction, but
621 /// incurs significant execution cost.
622 LLVM_ABI bool isLoweredToCall(const Function *F) const;
623
624 struct LSRCost {
625 /// TODO: Some of these could be merged. Also, a lexical ordering
626 /// isn't always optimal.
627 unsigned Insns;
628 unsigned NumRegs;
629 unsigned AddRecCost;
630 unsigned NumIVMuls;
631 unsigned NumBaseAdds;
632 unsigned ImmCost;
633 unsigned SetupCost;
634 unsigned ScaleCost;
635 };
636
637 /// Parameters that control the generic loop unrolling transformation.
639 /// The cost threshold for the unrolled loop. Should be relative to the
640 /// getInstructionCost values returned by this API, and the expectation is
641 /// that the unrolled loop's instructions when run through that interface
642 /// should not exceed this cost. However, this is only an estimate. Also,
643 /// specific loops may be unrolled even with a cost above this threshold if
644 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
645 /// restriction.
646 unsigned Threshold;
647 /// If complete unrolling will reduce the cost of the loop, we will boost
648 /// the Threshold by a certain percent to allow more aggressive complete
649 /// unrolling. This value provides the maximum boost percentage that we
650 /// can apply to Threshold (The value should be no less than 100).
651 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
652 /// MaxPercentThresholdBoost / 100)
653 /// E.g. if complete unrolling reduces the loop execution time by 50%
654 /// then we boost the threshold by the factor of 2x. If unrolling is not
655 /// expected to reduce the running time, then we do not increase the
656 /// threshold.
658 /// The cost threshold for the unrolled loop when optimizing for size (set
659 /// to UINT_MAX to disable).
661 /// The cost threshold for the unrolled loop, like Threshold, but used
662 /// for partial/runtime unrolling (set to UINT_MAX to disable).
664 /// The cost threshold for the unrolled loop when optimizing for size, like
665 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
666 /// UINT_MAX to disable).
668 /// A forced unrolling factor (the number of concatenated bodies of the
669 /// original loop in the unrolled loop body). When set to 0, the unrolling
670 /// transformation will select an unrolling factor based on the current cost
671 /// threshold and other factors.
672 unsigned Count;
673 /// Default unroll count for loops with run-time trip count.
675 // Set the maximum unrolling factor. The unrolling factor may be selected
676 // using the appropriate cost threshold, but may not exceed this number
677 // (set to UINT_MAX to disable). This does not apply in cases where the
678 // loop is being fully unrolled.
679 unsigned MaxCount;
680 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
681 /// to be overrided by a target gives more flexiblity on certain cases.
682 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
684 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
685 /// applies even if full unrolling is selected. This allows a target to fall
686 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
688 // Represents number of instructions optimized when "back edge"
689 // becomes "fall through" in unrolled loop.
690 // For now we count a conditional branch on a backedge and a comparison
691 // feeding it.
692 unsigned BEInsns;
693 /// Allow partial unrolling (unrolling of loops to expand the size of the
694 /// loop body, not only to eliminate small constant-trip-count loops).
696 /// Allow runtime unrolling (unrolling of loops to expand the size of the
697 /// loop body even when the number of loop iterations is not known at
698 /// compile time).
700 /// Allow generation of a loop remainder (extra iterations after unroll).
702 /// Allow emitting expensive instructions (such as divisions) when computing
703 /// the trip count of a loop for runtime unrolling.
705 /// Apply loop unroll on any kind of loop
706 /// (mainly to loops that fail runtime unrolling).
707 bool Force;
708 /// Allow using trip count upper bound to unroll loops.
710 /// Allow unrolling of all the iterations of the runtime loop remainder.
712 /// Allow unroll and jam. Used to enable unroll and jam for the target.
714 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
715 /// value above is used during unroll and jam for the outer loop size.
716 /// This value is used in the same manner to limit the size of the inner
717 /// loop.
719 /// Don't allow loop unrolling to simulate more than this number of
720 /// iterations when checking full unroll profitability
722 /// Disable runtime unrolling by default for vectorized loops.
724 /// Don't allow runtime unrolling if expanding the trip count takes more
725 /// than SCEVExpansionBudget.
727 /// Allow runtime unrolling multi-exit loops. Should only be set if the
728 /// target determined that multi-exit unrolling is profitable for the loop.
729 /// Fall back to the generic logic to determine whether multi-exit unrolling
730 /// is profitable if set to false.
732 /// Allow unrolling to add parallel reduction phis.
734 };
735
736 /// Get target-customized preferences for the generic loop unrolling
737 /// transformation. The caller will initialize UP with the current
738 /// target-independent defaults.
741 OptimizationRemarkEmitter *ORE) const;
742
743 /// Query the target whether it would be profitable to convert the given loop
744 /// into a hardware loop.
746 AssumptionCache &AC,
747 TargetLibraryInfo *LibInfo,
748 HardwareLoopInfo &HWLoopInfo) const;
749
750 // Query the target for which minimum vectorization factor epilogue
751 // vectorization should be considered.
753
754 /// Query the target whether it would be preferred to create a tail-folded
755 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
757
758 /// Query the target what the preferred style of tail folding is.
760
761 // Parameters that control the loop peeling transformation
763 /// A forced peeling factor (the number of bodied of the original loop
764 /// that should be peeled off before the loop body). When set to 0, the
765 /// a peeling factor based on profile information and other factors.
766 unsigned PeelCount;
767 /// Allow peeling off loop iterations.
769 /// Allow peeling off loop iterations for loop nests.
771 /// Allow peeling basing on profile. Uses to enable peeling off all
772 /// iterations basing on provided profile.
773 /// If the value is true the peeling cost model can decide to peel only
774 /// some iterations and in this case it will set this to false.
776
777 /// Peel off the last PeelCount loop iterations.
779 };
780
781 /// Get target-customized preferences for the generic loop peeling
782 /// transformation. The caller will initialize \p PP with the current
783 /// target-independent defaults with information from \p L and \p SE.
785 PeelingPreferences &PP) const;
786
787 /// Targets can implement their own combinations for target-specific
788 /// intrinsics. This function will be called from the InstCombine pass every
789 /// time a target-specific intrinsic is encountered.
790 ///
791 /// \returns std::nullopt to not do anything target specific or a value that
792 /// will be returned from the InstCombiner. It is possible to return null and
793 /// stop further processing of the intrinsic by returning nullptr.
794 LLVM_ABI std::optional<Instruction *>
796 /// Can be used to implement target-specific instruction combining.
797 /// \see instCombineIntrinsic
798 LLVM_ABI std::optional<Value *>
800 APInt DemandedMask, KnownBits &Known,
801 bool &KnownBitsComputed) const;
802 /// Can be used to implement target-specific instruction combining.
803 /// \see instCombineIntrinsic
804 LLVM_ABI std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
805 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
806 APInt &UndefElts2, APInt &UndefElts3,
807 std::function<void(Instruction *, unsigned, APInt, APInt &)>
808 SimplifyAndSetOp) const;
809 /// @}
810
811 /// \name Scalar Target Information
812 /// @{
813
814 /// Flags indicating the kind of support for population count.
815 ///
816 /// Compared to the SW implementation, HW support is supposed to
817 /// significantly boost the performance when the population is dense, and it
818 /// may or may not degrade performance if the population is sparse. A HW
819 /// support is considered as "Fast" if it can outperform, or is on a par
820 /// with, SW implementation when the population is sparse; otherwise, it is
821 /// considered as "Slow".
823
824 /// Return true if the specified immediate is legal add immediate, that
825 /// is the target has add instructions which can add a register with the
826 /// immediate without having to materialize the immediate into a register.
827 LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const;
828
829 /// Return true if adding the specified scalable immediate is legal, that is
830 /// the target has add instructions which can add a register with the
831 /// immediate (multiplied by vscale) without having to materialize the
832 /// immediate into a register.
833 LLVM_ABI bool isLegalAddScalableImmediate(int64_t Imm) const;
834
835 /// Return true if the specified immediate is legal icmp immediate,
836 /// that is the target has icmp instructions which can compare a register
837 /// against the immediate without having to materialize the immediate into a
838 /// register.
839 LLVM_ABI bool isLegalICmpImmediate(int64_t Imm) const;
840
841 /// Return true if the addressing mode represented by AM is legal for
842 /// this target, for a load/store of the specified type.
843 /// The type may be VoidTy, in which case only return true if the addressing
844 /// mode is legal for a load/store of any legal type.
845 /// If target returns true in LSRWithInstrQueries(), I may be valid.
846 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
847 /// an invariant value known only at runtime. Most targets should not accept
848 /// a scalable offset.
849 ///
850 /// TODO: Handle pre/postinc as well.
852 int64_t BaseOffset, bool HasBaseReg,
853 int64_t Scale, unsigned AddrSpace = 0,
854 Instruction *I = nullptr,
855 int64_t ScalableOffset = 0) const;
856
857 /// Return true if LSR cost of C1 is lower than C2.
859 const TargetTransformInfo::LSRCost &C2) const;
860
861 /// Return true if LSR major cost is number of registers. Targets which
862 /// implement their own isLSRCostLess and unset number of registers as major
863 /// cost should return false, otherwise return true.
865
866 /// Return true if LSR should drop a found solution if it's calculated to be
867 /// less profitable than the baseline.
869
870 /// \returns true if LSR should not optimize a chain that includes \p I.
872
873 /// Return true if the target can fuse a compare and branch.
874 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
875 /// calculation for the instructions in a loop.
876 LLVM_ABI bool canMacroFuseCmp() const;
877
878 /// Return true if the target can save a compare for loop count, for example
879 /// hardware loop saves a compare.
882 TargetLibraryInfo *LibInfo) const;
883
884 /// Which addressing mode Loop Strength Reduction will try to generate.
886 AMK_None = 0x0, ///< Don't prefer any addressing mode
887 AMK_PreIndexed = 0x1, ///< Prefer pre-indexed addressing mode
888 AMK_PostIndexed = 0x2, ///< Prefer post-indexed addressing mode
889 AMK_All = 0x3, ///< Consider all addressing modes
890 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/AMK_All)
891 };
892
893 /// Return the preferred addressing mode LSR should make efforts to generate.
896
897 /// Some targets only support masked load/store with a constant mask.
902
903 /// Return true if the target supports masked store.
904 LLVM_ABI bool
905 isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace,
907 /// Return true if the target supports masked load.
908 LLVM_ABI bool
909 isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace,
911
912 /// Return true if the target supports nontemporal store.
913 LLVM_ABI bool isLegalNTStore(Type *DataType, Align Alignment) const;
914 /// Return true if the target supports nontemporal load.
915 LLVM_ABI bool isLegalNTLoad(Type *DataType, Align Alignment) const;
916
917 /// \Returns true if the target supports broadcasting a load to a vector of
918 /// type <NumElements x ElementTy>.
919 LLVM_ABI bool isLegalBroadcastLoad(Type *ElementTy,
920 ElementCount NumElements) const;
921
922 /// Return true if the target supports masked scatter.
923 LLVM_ABI bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
924 /// Return true if the target supports masked gather.
925 LLVM_ABI bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
926 /// Return true if the target forces scalarizing of llvm.masked.gather
927 /// intrinsics.
929 Align Alignment) const;
930 /// Return true if the target forces scalarizing of llvm.masked.scatter
931 /// intrinsics.
933 Align Alignment) const;
934
935 /// Return true if the target supports masked compress store.
937 Align Alignment) const;
938 /// Return true if the target supports masked expand load.
939 LLVM_ABI bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
940
941 /// Return true if the target supports strided load.
942 LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
943
944 /// Return true is the target supports interleaved access for the given vector
945 /// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
946 /// address space \p AddrSpace.
947 LLVM_ABI bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
948 Align Alignment,
949 unsigned AddrSpace) const;
950
951 // Return true if the target supports masked vector histograms.
953 Type *DataType) const;
954
955 /// Return true if this is an alternating opcode pattern that can be lowered
956 /// to a single instruction on the target. In X86 this is for the addsub
957 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
958 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
959 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
960 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
961 /// \p VecTy is the vector type of the instruction to be generated.
962 LLVM_ABI bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
963 unsigned Opcode1,
964 const SmallBitVector &OpcodeMask) const;
965
966 /// Return true if we should be enabling ordered reductions for the target.
968
969 /// Return true if the target has a unified operation to calculate division
970 /// and remainder. If so, the additional implicit multiplication and
971 /// subtraction required to calculate a remainder from division are free. This
972 /// can enable more aggressive transformations for division and remainder than
973 /// would typically be allowed using throughput or size cost models.
974 LLVM_ABI bool hasDivRemOp(Type *DataType, bool IsSigned) const;
975
976 /// Return true if the given instruction (assumed to be a memory access
977 /// instruction) has a volatile variant. If that's the case then we can avoid
978 /// addrspacecast to generic AS for volatile loads/stores. Default
979 /// implementation returns false, which prevents address space inference for
980 /// volatile loads/stores.
981 LLVM_ABI bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
982
983 /// Return true if target doesn't mind addresses in vectors.
985
986 /// Return the cost of the scaling factor used in the addressing
987 /// mode represented by AM for this target, for a load/store
988 /// of the specified type.
989 /// If the AM is supported, the return value must be >= 0.
990 /// If the AM is not supported, it returns a negative value.
991 /// TODO: Handle pre/postinc as well.
993 StackOffset BaseOffset,
994 bool HasBaseReg, int64_t Scale,
995 unsigned AddrSpace = 0) const;
996
997 /// Return true if the loop strength reduce pass should make
998 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
999 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
1000 /// immediate offset and no index register.
1001 LLVM_ABI bool LSRWithInstrQueries() const;
1002
1003 /// Return true if it's free to truncate a value of type Ty1 to type
1004 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
1005 /// by referencing its sub-register AX.
1006 LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const;
1007
1008 /// Return true if it is profitable to hoist instruction in the
1009 /// then/else to before if.
1011
1012 LLVM_ABI bool useAA() const;
1013
1014 /// Return true if this type is legal.
1015 LLVM_ABI bool isTypeLegal(Type *Ty) const;
1016
1017 /// Returns the estimated number of registers required to represent \p Ty.
1018 LLVM_ABI unsigned getRegUsageForType(Type *Ty) const;
1019
1020 /// Return true if switches should be turned into lookup tables for the
1021 /// target.
1022 LLVM_ABI bool shouldBuildLookupTables() const;
1023
1024 /// Return true if switches should be turned into lookup tables
1025 /// containing this constant value for the target.
1027
1028 /// Return true if lookup tables should be turned into relative lookup tables.
1030
1031 /// Return true if the input function which is cold at all call sites,
1032 /// should use coldcc calling convention.
1034
1035 /// Return true if the input function is internal, should use fastcc calling
1036 /// convention.
1038
1039 /// Identifies if the vector form of the intrinsic has a scalar operand.
1041 unsigned ScalarOpdIdx) const;
1042
1043 /// Identifies if the vector form of the intrinsic is overloaded on the type
1044 /// of the operand at index \p OpdIdx, or on the return type if \p OpdIdx is
1045 /// -1.
1047 int OpdIdx) const;
1048
1049 /// Identifies if the vector form of the intrinsic that returns a struct is
1050 /// overloaded at the struct element index \p RetIdx.
1051 LLVM_ABI bool
1053 int RetIdx) const;
1054
1055 /// Represents a hint about the context in which an insert/extract is used.
1056 ///
1057 /// On some targets, inserts/extracts can cheaply be folded into loads/stores.
1058 ///
1059 /// This enum allows the vectorizer to give getVectorInstrCost an idea of how
1060 /// inserts/extracts are used
1061 ///
1062 /// See \c getVectorInstrContextHint to compute a VectorInstrContext from an
1063 /// insert/extract Instruction*.
1065 None, ///< The insert/extract is not used with a load/store.
1066 Load, ///< The value being inserted comes from a load (InsertElement only).
1067 Store, ///< The extracted value is stored (ExtractElement only).
1068 };
1069
1070 /// Calculates a VectorInstrContext from \p I.
1073
1074 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
1075 /// are set if the demanded result elements need to be inserted and/or
1076 /// extracted from vectors. The involved values may be passed in VL if
1077 /// Insert is true.
1079 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
1080 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
1081 ArrayRef<Value *> VL = {},
1083
1084 /// Estimate the overhead of scalarizing operands with the given types. The
1085 /// (potentially vector) types to use for each of argument are passes via Tys.
1089
1090 /// If target has efficient vector element load/store instructions, it can
1091 /// return true here so that insertion/extraction costs are not added to
1092 /// the scalarization cost of a load/store.
1094
1095 /// If the target supports tail calls.
1096 LLVM_ABI bool supportsTailCalls() const;
1097
1098 /// If target supports tail call on \p CB
1099 LLVM_ABI bool supportsTailCallFor(const CallBase *CB) const;
1100
1101 /// Don't restrict interleaved unrolling to small loops.
1102 LLVM_ABI bool enableAggressiveInterleaving(bool LoopHasReductions) const;
1103
1104 /// Returns options for expansion of memcmp. IsZeroCmp is
1105 // true if this is the expansion of memcmp(p1, p2, s) == 0.
1107 // Return true if memcmp expansion is enabled.
1108 operator bool() const { return MaxNumLoads > 0; }
1109
1110 // Maximum number of load operations.
1111 unsigned MaxNumLoads = 0;
1112
1113 // The list of available load sizes (in bytes), sorted in decreasing order.
1115
1116 // For memcmp expansion when the memcmp result is only compared equal or
1117 // not-equal to 0, allow up to this number of load pairs per block. As an
1118 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
1119 // a0 = load2bytes &a[0]
1120 // b0 = load2bytes &b[0]
1121 // a2 = load1byte &a[2]
1122 // b2 = load1byte &b[2]
1123 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
1124 unsigned NumLoadsPerBlock = 1;
1125
1126 // Set to true to allow overlapping loads. For example, 7-byte compares can
1127 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
1128 // requires all loads in LoadSizes to be doable in an unaligned way.
1130
1131 // Sometimes, the amount of data that needs to be compared is smaller than
1132 // the standard register size, but it cannot be loaded with just one load
1133 // instruction. For example, if the size of the memory comparison is 6
1134 // bytes, we can handle it more efficiently by loading all 6 bytes in a
1135 // single block and generating an 8-byte number, instead of generating two
1136 // separate blocks with conditional jumps for 4 and 2 byte loads. This
1137 // approach simplifies the process and produces the comparison result as
1138 // normal. This array lists the allowed sizes of memcmp tails that can be
1139 // merged into one block
1141 };
1143 bool IsZeroCmp) const;
1144
1145 /// Should the Select Optimization pass be enabled and ran.
1146 LLVM_ABI bool enableSelectOptimize() const;
1147
1148 /// Should the Select Optimization pass treat the given instruction like a
1149 /// select, potentially converting it to a conditional branch. This can
1150 /// include select-like instructions like or(zext(c), x) that can be converted
1151 /// to selects.
1153
1154 /// Enable matching of interleaved access groups.
1156
1157 /// Enable matching of interleaved access groups that contain predicated
1158 /// accesses or gaps and therefore vectorized using masked
1159 /// vector loads/stores.
1161
1162 /// Indicate that it is potentially unsafe to automatically vectorize
1163 /// floating-point operations because the semantics of vector and scalar
1164 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
1165 /// does not support IEEE-754 denormal numbers, while depending on the
1166 /// platform, scalar floating-point math does.
1167 /// This applies to floating-point math operations and calls, not memory
1168 /// operations, shuffles, or casts.
1170
1171 /// Determine if the target supports unaligned memory accesses.
1173 unsigned BitWidth,
1174 unsigned AddressSpace = 0,
1175 Align Alignment = Align(1),
1176 unsigned *Fast = nullptr) const;
1177
1178 /// Return hardware support for population count.
1179 LLVM_ABI PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
1180
1181 /// Return true if the hardware has a fast square-root instruction.
1182 LLVM_ABI bool haveFastSqrt(Type *Ty) const;
1183
1184 /// Return true if the cost of the instruction is too high to speculatively
1185 /// execute and should be kept behind a branch.
1186 /// This normally just wraps around a getInstructionCost() call, but some
1187 /// targets might report a low TCK_SizeAndLatency value that is incompatible
1188 /// with the fixed TCC_Expensive value.
1189 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1191
1192 /// Return true if it is faster to check if a floating-point value is NaN
1193 /// (or not-NaN) versus a comparison against a constant FP zero value.
1194 /// Targets should override this if materializing a 0.0 for comparison is
1195 /// generally as cheap as checking for ordered/unordered.
1197
1198 /// Return the expected cost of supporting the floating point operation
1199 /// of the specified type.
1201
1202 /// Return the expected cost of materializing for the given integer
1203 /// immediate of the specified type.
1205 TargetCostKind CostKind) const;
1206
1207 /// Return the expected cost of materialization for the given integer
1208 /// immediate of the specified type for a given instruction. The cost can be
1209 /// zero if the immediate can be folded into the specified instruction.
1210 LLVM_ABI InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1211 const APInt &Imm, Type *Ty,
1213 Instruction *Inst = nullptr) const;
1215 const APInt &Imm, Type *Ty,
1216 TargetCostKind CostKind) const;
1217
1218 /// Return the expected cost for the given integer when optimising
1219 /// for size. This is different than the other integer immediate cost
1220 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1221 /// target one ISA such as Aarch32 but smaller encodings could be possible
1222 /// with another such as Thumb. This return value is used as a penalty when
1223 /// the total costs for a constant is calculated (the bigger the cost, the
1224 /// more beneficial constant hoisting is).
1225 LLVM_ABI InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1226 const APInt &Imm,
1227 Type *Ty) const;
1228
1229 /// It can be advantageous to detach complex constants from their uses to make
1230 /// their generation cheaper. This hook allows targets to report when such
1231 /// transformations might negatively effect the code generation of the
1232 /// underlying operation. The motivating example is divides whereby hoisting
1233 /// constants prevents the code generator's ability to transform them into
1234 /// combinations of simpler operations.
1236 const Function &Fn) const;
1237
1238 /// @}
1239
1240 /// \name Vector Target Information
1241 /// @{
1242
1243 /// The various kinds of shuffle patterns for vector queries.
1245 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1246 SK_Reverse, ///< Reverse the order of the vector.
1247 SK_Select, ///< Selects elements from the corresponding lane of
1248 ///< either source operand. This is equivalent to a
1249 ///< vector select with a constant condition operand.
1250 SK_Transpose, ///< Transpose two vectors.
1251 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1252 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1253 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1254 ///< with any shuffle mask.
1255 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1256 ///< shuffle mask.
1257 SK_Splice ///< Concatenates elements from the first input vector
1258 ///< with elements of the second input vector. Returning
1259 ///< a vector of the same type as the input vectors.
1260 ///< Index indicates start offset in first input vector.
1261 };
1262
1263 /// Additional information about an operand's possible values.
1265 OK_AnyValue, // Operand can have any value.
1266 OK_UniformValue, // Operand is uniform (splat of a value).
1267 OK_UniformConstantValue, // Operand is uniform constant.
1268 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1269 };
1270
1271 /// Additional properties of an operand's values.
1277
1278 // Describe the values an operand can take. We're in the process
1279 // of migrating uses of OperandValueKind and OperandValueProperties
1280 // to use this class, and then will change the internal representation.
1284
1285 bool isConstant() const {
1287 }
1288 bool isUniform() const {
1290 }
1291 bool isPowerOf2() const {
1292 return Properties == OP_PowerOf2;
1293 }
1294 bool isNegatedPowerOf2() const {
1296 }
1297
1299 return {Kind, OP_None};
1300 }
1301
1303 OperandValueKind MergeKind = OK_AnyValue;
1304 if (isConstant() && OpInfoY.isConstant())
1305 MergeKind = OK_NonUniformConstantValue;
1306
1307 OperandValueProperties MergeProp = OP_None;
1308 if (Properties == OpInfoY.Properties)
1309 MergeProp = Properties;
1310 return {MergeKind, MergeProp};
1311 }
1312 };
1313
1314 /// \return the number of registers in the target-provided register class.
1315 LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const;
1316
1317 /// \return true if the target supports load/store that enables fault
1318 /// suppression of memory operands when the source condition is false.
1319 LLVM_ABI bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const;
1320
1321 /// \return the target-provided register class ID for the provided type,
1322 /// accounting for type promotion and other type-legalization techniques that
1323 /// the target might apply. However, it specifically does not account for the
1324 /// scalarization or splitting of vector types. Should a vector type require
1325 /// scalarization or splitting into multiple underlying vector registers, that
1326 /// type should be mapped to a register class containing no registers.
1327 /// Specifically, this is designed to provide a simple, high-level view of the
1328 /// register allocation later performed by the backend. These register classes
1329 /// don't necessarily map onto the register classes used by the backend.
1330 /// FIXME: It's not currently possible to determine how many registers
1331 /// are used by the provided type.
1333 Type *Ty = nullptr) const;
1334
1335 /// \return the target-provided register class name
1336 LLVM_ABI const char *getRegisterClassName(unsigned ClassID) const;
1337
1338 /// \return the cost of spilling a register in the target-provided register
1339 /// class to the stack.
1341 getRegisterClassSpillCost(unsigned ClassID, TargetCostKind CostKind) const;
1342
1343 /// \return the cost of reloading a register in the target-provided register
1344 /// class from the stack.
1346 getRegisterClassReloadCost(unsigned ClassID, TargetCostKind CostKind) const;
1347
1349
1350 /// \return The width of the largest scalar or vector register type.
1351 LLVM_ABI TypeSize getRegisterBitWidth(RegisterKind K) const;
1352
1353 /// \return The width of the smallest vector register type.
1354 LLVM_ABI unsigned getMinVectorRegisterBitWidth() const;
1355
1356 /// \return The maximum value of vscale if the target specifies an
1357 /// architectural maximum vector length, and std::nullopt otherwise.
1358 LLVM_ABI std::optional<unsigned> getMaxVScale() const;
1359
1360 /// \return the value of vscale to tune the cost model for.
1361 LLVM_ABI std::optional<unsigned> getVScaleForTuning() const;
1362
1363 /// \return True if the vectorization factor should be chosen to
1364 /// make the vector of the smallest element type match the size of a
1365 /// vector register. For wider element types, this could result in
1366 /// creating vectors that span multiple vector registers.
1367 /// If false, the vectorization factor will be chosen based on the
1368 /// size of the widest element type.
1369 /// \p K Register Kind for vectorization.
1370 LLVM_ABI bool
1372
1373 /// \return The minimum vectorization factor for types of given element
1374 /// bit width, or 0 if there is no minimum VF. The returned value only
1375 /// applies when shouldMaximizeVectorBandwidth returns true.
1376 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1377 LLVM_ABI ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1378
1379 /// \return The maximum vectorization factor for types of given element
1380 /// bit width and opcode, or 0 if there is no maximum VF.
1381 /// Currently only used by the SLP vectorizer.
1382 LLVM_ABI unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1383
1384 /// \return The minimum vectorization factor for the store instruction. Given
1385 /// the initial estimation of the minimum vector factor and store value type,
1386 /// it tries to find possible lowest VF, which still might be profitable for
1387 /// the vectorization.
1388 /// \param VF Initial estimation of the minimum vector factor.
1389 /// \param ScalarMemTy Scalar memory type of the store operation.
1390 /// \param ScalarValTy Scalar type of the stored value.
1391 /// \param Alignment Alignment of the store
1392 /// \param AddrSpace Address space of the store
1393 /// Currently only used by the SLP vectorizer.
1394 LLVM_ABI unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1395 Type *ScalarValTy, Align Alignment,
1396 unsigned AddrSpace) const;
1397
1398 /// \return True if it should be considered for address type promotion.
1399 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1400 /// profitable without finding other extensions fed by the same input.
1402 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1403
1404 /// \return The size of a cache line in bytes.
1405 LLVM_ABI unsigned getCacheLineSize() const;
1406
1407 /// The possible cache levels
1408 enum class CacheLevel {
1409 L1D, // The L1 data cache
1410 L2D, // The L2 data cache
1411
1412 // We currently do not model L3 caches, as their sizes differ widely between
1413 // microarchitectures. Also, we currently do not have a use for L3 cache
1414 // size modeling yet.
1415 };
1416
1417 /// \return The size of the cache level in bytes, if available.
1418 LLVM_ABI std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1419
1420 /// \return The associativity of the cache level, if available.
1421 LLVM_ABI std::optional<unsigned>
1422 getCacheAssociativity(CacheLevel Level) const;
1423
1424 /// \return The minimum architectural page size for the target.
1425 LLVM_ABI std::optional<unsigned> getMinPageSize() const;
1426
1427 /// \return How much before a load we should place the prefetch
1428 /// instruction. This is currently measured in number of
1429 /// instructions.
1430 LLVM_ABI unsigned getPrefetchDistance() const;
1431
1432 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1433 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1434 /// and the arguments provided are meant to serve as a basis for deciding this
1435 /// for a particular loop.
1436 ///
1437 /// \param NumMemAccesses Number of memory accesses in the loop.
1438 /// \param NumStridedMemAccesses Number of the memory accesses that
1439 /// ScalarEvolution could find a known stride
1440 /// for.
1441 /// \param NumPrefetches Number of software prefetches that will be
1442 /// emitted as determined by the addresses
1443 /// involved and the cache line size.
1444 /// \param HasCall True if the loop contains a call.
1445 ///
1446 /// \return This is the minimum stride in bytes where it makes sense to start
1447 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1448 /// stride.
1449 LLVM_ABI unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1450 unsigned NumStridedMemAccesses,
1451 unsigned NumPrefetches,
1452 bool HasCall) const;
1453
1454 /// \return The maximum number of iterations to prefetch ahead. If
1455 /// the required number of iterations is more than this number, no
1456 /// prefetching is performed.
1457 LLVM_ABI unsigned getMaxPrefetchIterationsAhead() const;
1458
1459 /// \return True if prefetching should also be done for writes.
1460 LLVM_ABI bool enableWritePrefetching() const;
1461
1462 /// \return if target want to issue a prefetch in address space \p AS.
1463 LLVM_ABI bool shouldPrefetchAddressSpace(unsigned AS) const;
1464
1465 /// \return The cost of a partial reduction, which is a reduction from a
1466 /// vector to another vector with fewer elements of larger size. They are
1467 /// represented by the llvm.vector.partial.reduce.add and
1468 /// llvm.vector.partial.reduce.fadd intrinsics, which take an accumulator of
1469 /// type \p AccumType and a second vector operand to be accumulated, whose
1470 /// element count is specified by \p VF. The type of reduction is specified by
1471 /// \p Opcode. The second operand passed to the intrinsic could be the result
1472 /// of an extend, such as sext or zext. In this case \p BinOp is nullopt,
1473 /// \p InputTypeA represents the type being extended and \p OpAExtend the
1474 /// operation, i.e. sign- or zero-extend.
1475 /// For floating-point partial reductions, any fast math flags (FMF) should be
1476 /// provided to govern which reductions are valid to perform (depending on
1477 /// reassoc or contract, for example), whereas this must be nullopt for
1478 /// integer partial reductions.
1479 /// Also, \p InputTypeB should be nullptr and OpBExtend should be None.
1480 /// Alternatively, the second operand could be the result of a binary
1481 /// operation performed on two extends, i.e.
1482 /// mul(zext i8 %a -> i32, zext i8 %b -> i32).
1483 /// In this case \p BinOp may specify the opcode of the binary operation,
1484 /// \p InputTypeA and \p InputTypeB the types being extended, and
1485 /// \p OpAExtend, \p OpBExtend the form of extensions. An example of an
1486 /// operation that uses a partial reduction is a dot product, which reduces
1487 /// two vectors in binary mul operation to another of 4 times fewer and 4
1488 /// times larger elements.
1490 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
1492 PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
1493 TTI::TargetCostKind CostKind, std::optional<FastMathFlags> FMF) const;
1494
1495 /// \return The maximum interleave factor that any transform should try to
1496 /// perform for this target. This number depends on the level of parallelism
1497 /// and the number of execution units in the CPU.
1498 LLVM_ABI unsigned getMaxInterleaveFactor(ElementCount VF) const;
1499
1500 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1501 LLVM_ABI static OperandValueInfo getOperandInfo(const Value *V);
1502
1503 /// Collect common data between two OperandValueInfo inputs
1504 LLVM_ABI static OperandValueInfo commonOperandInfo(const Value *X,
1505 const Value *Y);
1506
1507 /// This is an approximation of reciprocal throughput of a math/logic op.
1508 /// A higher cost indicates less expected throughput.
1509 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1510 /// clock cycles per instruction when the instructions are not part of a
1511 /// limiting dependency chain."
1512 /// Therefore, costs should be scaled to account for multiple execution units
1513 /// on the target that can process this type of instruction. For example, if
1514 /// there are 5 scalar integer units and 2 vector integer units that can
1515 /// calculate an 'add' in a single cycle, this model should indicate that the
1516 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1517 /// add instruction.
1518 /// \p Args is an optional argument which holds the instruction operands
1519 /// values so the TTI can analyze those values searching for special
1520 /// cases or optimizations based on those values.
1521 /// \p CxtI is the optional original context instruction, if one exists, to
1522 /// provide even more information.
1523 /// \p TLibInfo is used to search for platform specific vector library
1524 /// functions for instructions that might be converted to calls (e.g. frem).
1526 unsigned Opcode, Type *Ty,
1530 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr,
1531 const TargetLibraryInfo *TLibInfo = nullptr) const;
1532
1533 /// Returns the cost estimation for alternating opcode pattern that can be
1534 /// lowered to a single instruction on the target. In X86 this is for the
1535 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1536 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1537 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1538 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1539 /// \p VecTy is the vector type of the instruction to be generated.
1541 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1542 const SmallBitVector &OpcodeMask,
1544
1545 /// \return The cost of a shuffle instruction of kind Kind with inputs of type
1546 /// SrcTy, producing a vector of type DstTy. The exact mask may be passed as
1547 /// Mask, or else the array will be empty. The Index and SubTp parameters
1548 /// are used by the subvector insertions shuffle kinds to show the insert
1549 /// point and the type of the subvector being inserted. The operands of the
1550 /// shuffle can be passed through \p Args, which helps improve the cost
1551 /// estimation in some cases, like in broadcast loads.
1553 ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1554 ArrayRef<int> Mask = {},
1556 VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = {},
1557 const Instruction *CxtI = nullptr) const;
1558
1559 /// Represents a hint about the context in which a cast is used.
1560 ///
1561 /// For zext/sext, the context of the cast is the operand, which must be a
1562 /// load of some kind. For trunc, the context is of the cast is the single
1563 /// user of the instruction, which must be a store of some kind.
1564 ///
1565 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1566 /// type of cast it's dealing with, as not every cast is equal. For instance,
1567 /// the zext of a load may be free, but the zext of an interleaving load can
1568 //// be (very) expensive!
1569 ///
1570 /// See \c getCastContextHint to compute a CastContextHint from a cast
1571 /// Instruction*. Callers can use it if they don't need to override the
1572 /// context and just want it to be calculated from the instruction.
1573 ///
1574 /// FIXME: This handles the types of load/store that the vectorizer can
1575 /// produce, which are the cases where the context instruction is most
1576 /// likely to be incorrect. There are other situations where that can happen
1577 /// too, which might be handled here but in the long run a more general
1578 /// solution of costing multiple instructions at the same times may be better.
1580 None, ///< The cast is not used with a load/store of any kind.
1581 Normal, ///< The cast is used with a normal load/store.
1582 Masked, ///< The cast is used with a masked load/store.
1583 GatherScatter, ///< The cast is used with a gather/scatter.
1584 Interleave, ///< The cast is used with an interleaved load/store.
1585 Reversed, ///< The cast is used with a reversed load/store.
1586 };
1587
1588 /// Calculates a CastContextHint from \p I.
1589 /// This should be used by callers of getCastInstrCost if they wish to
1590 /// determine the context from some instruction.
1591 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1592 /// or if it's another type of cast.
1594
1595 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1596 /// zext, etc. If there is an existing instruction that holds Opcode, it
1597 /// may be passed in the 'I' parameter.
1599 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
1601 const Instruction *I = nullptr) const;
1602
1603 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1604 /// Index = -1 to indicate that there is no information about the index value.
1606 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1607 unsigned Index, TTI::TargetCostKind CostKind) const;
1608
1609 /// \return The expected cost of control-flow related instructions such as
1610 /// Phi, Ret, Br, Switch.
1613 const Instruction *I = nullptr) const;
1614
1615 /// \returns The expected cost of compare and select instructions. If there
1616 /// is an existing instruction that holds Opcode, it may be passed in the
1617 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1618 /// is using a compare with the specified predicate as condition. When vector
1619 /// types are passed, \p VecPred must be used for all lanes. For a
1620 /// comparison, the two operands are the natural values. For a select, the
1621 /// two operands are the *value* operands, not the condition operand.
1623 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
1625 OperandValueInfo Op1Info = {OK_AnyValue, OP_None},
1626 OperandValueInfo Op2Info = {OK_AnyValue, OP_None},
1627 const Instruction *I = nullptr) const;
1628
1629 /// \return The expected cost of vector Insert and Extract.
1630 /// Use -1 to indicate that there is no information on the index value.
1631 /// This is used when the instruction is not available; a typical use
1632 /// case is to provision the cost of vectorization/scalarization in
1633 /// vectorizer passes.
1635 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind,
1636 unsigned Index = -1, const Value *Op0 = nullptr,
1637 const Value *Op1 = nullptr,
1639
1640 /// \return The expected cost of vector Insert and Extract.
1641 /// Use -1 to indicate that there is no information on the index value.
1642 /// This is used when the instruction is not available; a typical use
1643 /// case is to provision the cost of vectorization/scalarization in
1644 /// vectorizer passes.
1645 /// \param ScalarUserAndIdx encodes the information about extracts from a
1646 /// vector with 'Scalar' being the value being extracted,'User' being the user
1647 /// of the extract(nullptr if user is not known before vectorization) and
1648 /// 'Idx' being the extract lane.
1650 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
1651 Value *Scalar,
1652 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
1654
1655 /// \return The expected cost of vector Insert and Extract.
1656 /// This is used when instruction is available, and implementation
1657 /// asserts 'I' is not nullptr.
1658 ///
1659 /// A typical suitable use case is cost estimation when vector instruction
1660 /// exists (e.g., from basic blocks during transformation).
1662 const Instruction &I, Type *Val, TTI::TargetCostKind CostKind,
1663 unsigned Index = -1,
1665
1666 /// \return The expected cost of inserting or extracting a lane that is \p
1667 /// Index elements from the end of a vector, i.e. the mathematical expression
1668 /// for the lane is (VF - 1 - Index). This is required for scalable vectors
1669 /// where the exact lane index is unknown at compile time.
1671 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind,
1672 unsigned Index) const;
1673
1674 /// \return The expected cost of aggregate inserts and extracts. This is
1675 /// used when the instruction is not available; a typical use case is to
1676 /// provision the cost of vectorization/scalarization in vectorizer passes.
1678 unsigned Opcode, TTI::TargetCostKind CostKind) const;
1679
1680 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1681 /// \p ReplicationFactor times.
1682 ///
1683 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1684 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1686 Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
1688
1689 /// \return The cost of Load and Store instructions. The operand info
1690 /// \p OpdInfo should refer to the stored value for stores and the address
1691 /// for loads.
1693 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1696 const Instruction *I = nullptr) const;
1697
1698 /// \return The cost of the interleaved memory operation.
1699 /// \p Opcode is the memory operation code
1700 /// \p VecTy is the vector type of the interleaved access.
1701 /// \p Factor is the interleave factor
1702 /// \p Indices is the indices for interleaved load members (as interleaved
1703 /// load allows gaps)
1704 /// \p Alignment is the alignment of the memory operation
1705 /// \p AddressSpace is address space of the pointer.
1706 /// \p UseMaskForCond indicates if the memory access is predicated.
1707 /// \p UseMaskForGaps indicates if gaps should be masked.
1709 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1710 Align Alignment, unsigned AddressSpace,
1712 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1713
1714 /// A helper function to determine the type of reduction algorithm used
1715 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1716 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1717 return FMF && !(*FMF).allowReassoc();
1718 }
1719
1720 /// Calculate the cost of vector reduction intrinsics.
1721 ///
1722 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1723 /// value using the operation denoted by \p Opcode. The FastMathFlags
1724 /// parameter \p FMF indicates what type of reduction we are performing:
1725 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1726 /// involves successively splitting a vector into half and doing the
1727 /// operation on the pair of halves until you have a scalar value. For
1728 /// example:
1729 /// (v0, v1, v2, v3)
1730 /// ((v0+v2), (v1+v3), undef, undef)
1731 /// ((v0+v2+v1+v3), undef, undef, undef)
1732 /// This is the default behaviour for integer operations, whereas for
1733 /// floating point we only do this if \p FMF indicates that
1734 /// reassociation is allowed.
1735 /// 2. Ordered. For a vector with N elements this involves performing N
1736 /// operations in lane order, starting with an initial scalar value, i.e.
1737 /// result = InitVal + v0
1738 /// result = result + v1
1739 /// result = result + v2
1740 /// result = result + v3
1741 /// This is only the case for FP operations and when reassociation is not
1742 /// allowed.
1743 ///
1745 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1747
1751
1752 /// Calculate the cost of an extended reduction pattern, similar to
1753 /// getArithmeticReductionCost of an Add/Sub reduction with multiply and
1754 /// optional extensions. This is the cost of as:
1755 /// * ResTy vecreduce.add/sub(mul (A, B)) or,
1756 /// * ResTy vecreduce.add/sub(mul(ext(Ty A), ext(Ty B)).
1758 bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty,
1760
1761 /// Calculate the cost of an extended reduction pattern, similar to
1762 /// getArithmeticReductionCost of a reduction with an extension.
1763 /// This is the cost of as:
1764 /// ResTy vecreduce.opcode(ext(Ty A)).
1766 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1767 std::optional<FastMathFlags> FMF,
1769
1770 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1771 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1772 /// 3. scalar instruction which is to be vectorized.
1775
1776 /// \returns The cost of memory intrinsic instructions.
1777 /// Used when IntrinsicInst is not materialized.
1781
1782 /// \returns The cost of Call instructions.
1784 Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1786
1787 /// \returns The number of pieces into which the provided type must be
1788 /// split during legalization. Zero is returned when the answer is unknown.
1789 LLVM_ABI unsigned getNumberOfParts(Type *Tp) const;
1790
1791 /// \returns The cost of the address computation. For most targets this can be
1792 /// merged into the instruction indexing mode. Some targets might want to
1793 /// distinguish between address computation for memory operations with vector
1794 /// pointer types and scalar pointer types. Such targets should override this
1795 /// function. \p SE holds the pointer for the scalar evolution object which
1796 /// was used in order to get the Ptr step value. \p Ptr holds the SCEV of the
1797 /// access pointer.
1799 getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr,
1801
1802 /// \returns The cost, if any, of keeping values of the given types alive
1803 /// over a callsite.
1804 ///
1805 /// Some types may require the use of register classes that do not have
1806 /// any callee-saved registers, so would require a spill and fill.
1809
1810 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1811 /// will contain additional information - whether the intrinsic may write
1812 /// or read to memory, volatility and the pointer. Info is undefined
1813 /// if false is returned.
1815 MemIntrinsicInfo &Info) const;
1816
1817 /// \returns The maximum element size, in bytes, for an element
1818 /// unordered-atomic memory intrinsic.
1820
1821 /// \returns A value which is the result of the given memory intrinsic. If \p
1822 /// CanCreate is true, new instructions may be created to extract the result
1823 /// from the given intrinsic memory operation. Returns nullptr if the target
1824 /// cannot create a result from the given intrinsic.
1825 LLVM_ABI Value *
1827 bool CanCreate = true) const;
1828
1829 /// \returns The type to use in a loop expansion of a memcpy call.
1831 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1832 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
1833 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1834
1835 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1836 /// \param RemainingBytes The number of bytes to copy.
1837 ///
1838 /// Calculates the operand types to use when copying \p RemainingBytes of
1839 /// memory, where source and destination alignments are \p SrcAlign and
1840 /// \p DestAlign respectively.
1842 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1843 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1844 Align SrcAlign, Align DestAlign,
1845 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1846
1847 /// \returns True if the two functions have compatible attributes for inlining
1848 /// purposes.
1849 LLVM_ABI bool areInlineCompatible(const Function *Caller,
1850 const Function *Callee) const;
1851
1852 /// Returns a penalty for invoking call \p Call in \p F.
1853 /// For example, if a function F calls a function G, which in turn calls
1854 /// function H, then getInlineCallPenalty(F, H()) would return the
1855 /// penalty of calling H from F, e.g. after inlining G into F.
1856 /// \p DefaultCallPenalty is passed to give a default penalty that
1857 /// the target can amend or override.
1858 LLVM_ABI unsigned getInlineCallPenalty(const Function *F,
1859 const CallBase &Call,
1860 unsigned DefaultCallPenalty) const;
1861
1862 /// \returns true if `Caller`'s `Attr` should be added to the new function
1863 /// created by outlining part of `Caller`.
1864 LLVM_ABI bool
1866 const Attribute &Attr) const;
1867
1868 /// \returns True if the caller and callee agree on how \p Types will be
1869 /// passed to or returned from the callee.
1870 /// to the callee.
1871 /// \param Types List of types to check.
1872 LLVM_ABI bool areTypesABICompatible(const Function *Caller,
1873 const Function *Callee,
1874 ArrayRef<Type *> Types) const;
1875
1876 /// The type of load/store indexing.
1878 MIM_Unindexed, ///< No indexing.
1879 MIM_PreInc, ///< Pre-incrementing.
1880 MIM_PreDec, ///< Pre-decrementing.
1881 MIM_PostInc, ///< Post-incrementing.
1882 MIM_PostDec ///< Post-decrementing.
1883 };
1884
1885 /// \returns True if the specified indexed load for the given type is legal.
1886 LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1887
1888 /// \returns True if the specified indexed store for the given type is legal.
1889 LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1890
1891 /// \returns The bitwidth of the largest vector type that should be used to
1892 /// load/store in the given address space.
1893 LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1894
1895 /// \returns True if the load instruction is legal to vectorize.
1897
1898 /// \returns True if the store instruction is legal to vectorize.
1900
1901 /// \returns True if it is legal to vectorize the given load chain.
1902 LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1903 Align Alignment,
1904 unsigned AddrSpace) const;
1905
1906 /// \returns True if it is legal to vectorize the given store chain.
1907 LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1908 Align Alignment,
1909 unsigned AddrSpace) const;
1910
1911 /// \returns True if it is legal to vectorize the given reduction kind.
1913 ElementCount VF) const;
1914
1915 /// \returns True if the given type is supported for scalable vectors
1917
1918 /// \returns The new vector factor value if the target doesn't support \p
1919 /// SizeInBytes loads or has a better vector factor.
1920 LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1921 unsigned ChainSizeInBytes,
1922 VectorType *VecTy) const;
1923
1924 /// \returns The new vector factor value if the target doesn't support \p
1925 /// SizeInBytes stores or has a better vector factor.
1926 LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1927 unsigned ChainSizeInBytes,
1928 VectorType *VecTy) const;
1929
1930 /// \returns True if the target prefers fixed width vectorization if the
1931 /// loop vectorizer's cost-model assigns an equal cost to the fixed and
1932 /// scalable version of the vectorized loop.
1933 /// \p IsEpilogue is true if the decision is for the epilogue loop.
1934 LLVM_ABI bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const;
1935
1936 /// \returns True if target prefers SLP vectorizer with altermate opcode
1937 /// vectorization, false - otherwise.
1939
1940 /// \returns True if the SLP vectorizer should apply the instruction-count
1941 /// check that rejects 2-element vector trees when the vector instruction
1942 /// count exceeds the scalar instruction count, false if the target opts out
1943 /// of this heuristic.
1944 LLVM_ABI bool preferSLPInstCountCheck() const;
1945
1946 /// \returns True if the target prefers reductions of \p Kind to be performed
1947 /// in the loop.
1948 LLVM_ABI bool preferInLoopReduction(RecurKind Kind, Type *Ty) const;
1949
1950 /// \returns True if the target prefers reductions select kept in the loop
1951 /// when tail folding. i.e.
1952 /// loop:
1953 /// p = phi (0, s)
1954 /// a = add (p, x)
1955 /// s = select (mask, a, p)
1956 /// vecreduce.add(s)
1957 ///
1958 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1959 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1960 /// by the target, this can lead to cleaner code generation.
1962
1963 /// Return true if the loop vectorizer should consider vectorizing an
1964 /// otherwise scalar epilogue loop if the loop already has been vectorized
1965 /// processing \p Iters scalar iterations per vector iteration.
1967
1968 /// \returns True if the loop vectorizer should discard any VFs where the
1969 /// maximum register pressure exceeds getNumberOfRegisters.
1971
1972 /// \returns True if the target wants to expand the given reduction intrinsic
1973 /// into a shuffle sequence.
1975
1977
1978 /// \returns The shuffle sequence pattern used to expand the given reduction
1979 /// intrinsic.
1982
1983 /// \returns the size cost of rematerializing a GlobalValue address relative
1984 /// to a stack reload.
1985 LLVM_ABI unsigned getGISelRematGlobalCost() const;
1986
1987 /// \returns the lower bound of a trip count to decide on vectorization
1988 /// while tail-folding.
1990
1991 /// \returns True if the target supports scalable vectors.
1992 LLVM_ABI bool supportsScalableVectors() const;
1993
1994 /// \return true when scalable vectorization is preferred.
1996
1997 /// \name Vector Predication Information
1998 /// @{
1999 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
2000 /// in hardware. (see LLVM Language Reference - "Vector Predication
2001 /// Intrinsics"). Use of %evl is discouraged when that is not the case.
2002 LLVM_ABI bool hasActiveVectorLength() const;
2003
2004 /// Return true if sinking I's operands to the same basic block as I is
2005 /// profitable, e.g. because the operands can be folded into a target
2006 /// instruction during instruction selection. After calling the function
2007 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
2008 /// come first).
2011
2012 /// Return true if it's significantly cheaper to shift a vector by a uniform
2013 /// scalar than by an amount which will vary across each lane. On x86 before
2014 /// AVX2 for example, there is a "psllw" instruction for the former case, but
2015 /// no simple instruction for a general "a << b" operation on vectors.
2016 /// This should also apply to lowering for vector funnel shifts (rotates).
2018
2021 // keep the predicating parameter
2023 // where legal, discard the predicate parameter
2025 // transform into something else that is also predicating
2027 };
2028
2029 // How to transform the EVL parameter.
2030 // Legal: keep the EVL parameter as it is.
2031 // Discard: Ignore the EVL parameter where it is safe to do so.
2032 // Convert: Fold the EVL into the mask parameter.
2034
2035 // How to transform the operator.
2036 // Legal: The target supports this operator.
2037 // Convert: Convert this to a non-VP operation.
2038 // The 'Discard' strategy is invalid.
2040
2041 bool shouldDoNothing() const {
2042 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
2043 }
2046 };
2047
2048 /// \returns How the target needs this vector-predicated operation to be
2049 /// transformed.
2051 getVPLegalizationStrategy(const VPIntrinsic &PI) const;
2052 /// @}
2053
2054 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
2055 /// state.
2056 ///
2057 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
2058 /// node containing a jump table in a format suitable for the target, so it
2059 /// needs to know what format of jump table it can legally use.
2060 ///
2061 /// For non-Arm targets, this function isn't used. It defaults to returning
2062 /// false, but it shouldn't matter what it returns anyway.
2063 LLVM_ABI bool hasArmWideBranch(bool Thumb) const;
2064
2065 /// Returns a bitmask constructed from the target-features or fmv-features
2066 /// metadata of a function corresponding to its Arch Extensions.
2067 LLVM_ABI APInt getFeatureMask(const Function &F) const;
2068
2069 /// Returns a bitmask constructed from the target-features or fmv-features
2070 /// metadata of a function corresponding to its FMV priority.
2071 LLVM_ABI APInt getPriorityMask(const Function &F) const;
2072
2073 /// Returns true if this is an instance of a function with multiple versions.
2074 LLVM_ABI bool isMultiversionedFunction(const Function &F) const;
2075
2076 /// \return The maximum number of function arguments the target supports.
2077 LLVM_ABI unsigned getMaxNumArgs() const;
2078
2079 /// \return For an array of given Size, return alignment boundary to
2080 /// pad to. Default is no padding.
2081 LLVM_ABI unsigned getNumBytesToPadGlobalArray(unsigned Size,
2082 Type *ArrayType) const;
2083
2084 /// @}
2085
2086 /// Collect kernel launch bounds for \p F into \p LB.
2088 const Function &F,
2089 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
2090
2091 /// Returns true if GEP should not be used to index into vectors for this
2092 /// target.
2094
2095 /// Determine if an instruction with Custom uniformity can be proven uniform
2096 /// based on which operands are uniform.
2097 ///
2098 /// \param I The instruction to check.
2099 /// \param UniformArgs A bitvector indicating which operands are known to be
2100 /// uniform (bit N corresponds to operand N).
2101 /// \returns true if the instruction result can be proven uniform given the
2102 /// uniform operands, false otherwise.
2103 LLVM_ABI bool isUniform(const Instruction *I,
2104 const SmallBitVector &UniformArgs) const;
2105
2106private:
2107 std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
2108};
2109
2110/// Analysis pass providing the \c TargetTransformInfo.
2111///
2112/// The core idea of the TargetIRAnalysis is to expose an interface through
2113/// which LLVM targets can analyze and provide information about the middle
2114/// end's target-independent IR. This supports use cases such as target-aware
2115/// cost modeling of IR constructs.
2116///
2117/// This is a function analysis because much of the cost modeling for targets
2118/// is done in a subtarget specific way and LLVM supports compiling different
2119/// functions targeting different subtargets in order to support runtime
2120/// dispatch according to the observed subtarget.
2121class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2122public:
2124
2125 /// Default construct a target IR analysis.
2126 ///
2127 /// This will use the module's datalayout to construct a baseline
2128 /// conservative TTI result.
2130
2131 /// Construct an IR analysis pass around a target-provide callback.
2132 ///
2133 /// The callback will be called with a particular function for which the TTI
2134 /// is needed and must return a TTI object for that function.
2135 LLVM_ABI
2136 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2137
2138 // Value semantics. We spell out the constructors for MSVC.
2140 : TTICallback(Arg.TTICallback) {}
2142 : TTICallback(std::move(Arg.TTICallback)) {}
2144 TTICallback = RHS.TTICallback;
2145 return *this;
2146 }
2148 TTICallback = std::move(RHS.TTICallback);
2149 return *this;
2150 }
2151
2153
2154private:
2156 LLVM_ABI static AnalysisKey Key;
2157
2158 /// The callback used to produce a result.
2159 ///
2160 /// We use a completely opaque callback so that targets can provide whatever
2161 /// mechanism they desire for constructing the TTI for a given function.
2162 ///
2163 /// FIXME: Should we really use std::function? It's relatively inefficient.
2164 /// It might be possible to arrange for even stateful callbacks to outlive
2165 /// the analysis and thus use a function_ref which would be lighter weight.
2166 /// This may also be less error prone as the callback is likely to reference
2167 /// the external TargetMachine, and that reference needs to never dangle.
2168 std::function<Result(const Function &)> TTICallback;
2169
2170 /// Helper function used as the callback in the default constructor.
2171 static Result getDefaultTTI(const Function &F);
2172};
2173
2174/// Wrapper pass for TargetTransformInfo.
2175///
2176/// This pass can be constructed from a TTI object which it stores internally
2177/// and is queried by passes.
2179 TargetIRAnalysis TIRA;
2180 std::optional<TargetTransformInfo> TTI;
2181
2182 virtual void anchor();
2183
2184public:
2185 static char ID;
2186
2187 /// We must provide a default constructor for the pass but it should
2188 /// never be used.
2189 ///
2190 /// Use the constructor below or call one of the creation routines.
2192
2194
2196};
2197
2198/// Create an analysis pass wrapper around a TTI object.
2199///
2200/// This analysis pass just holds the TTI instance and makes it available to
2201/// clients.
2204
2205} // namespace llvm
2206
2207#endif
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
#define LLVM_ABI
Definition Compiler.h:213
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
TargetTransformInfo::VPLegalization VPLegalization
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
Conditional Branch instruction.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition Pass.h:285
ImmutablePass(char &pid)
Definition Pass.h:287
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid(), bool TypeBasedOnly=false)
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Information for memory intrinsic cost model.
MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, Align Alignment, unsigned AddressSpace=0)
const Instruction * getInst() const
MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, const Instruction *I=nullptr)
MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy, bool VariableMask, Align Alignment, const Instruction *I=nullptr)
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
An instruction for storing to memory.
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
LLVM_ABI TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
LLVM_ABI Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const
LLVM_ABI bool isLegalToVectorizeLoad(LoadInst *LI) const
LLVM_ABI std::optional< unsigned > getVScaleForTuning() const
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
LLVM_ABI unsigned getMaxNumArgs() const
LLVM_ABI bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
LLVM_ABI bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
LLVM_ABI bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
LLVM_ABI bool isLegalToVectorizeStore(StoreInst *SI) const
LLVM_ABI InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add/...
LLVM_ABI bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
LLVM_ABI bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
LLVM_ABI bool isMultiversionedFunction(const Function &F) const
Returns true if this is an instance of a function with multiple versions.
LLVM_ABI bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
LLVM_ABI bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const
Return true if the target supports masked store.
LLVM_ABI bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
LLVM_ABI unsigned getAssumedAddrSpace(const Value *V) const
LLVM_ABI bool preferAlternateOpcodeVectorization() const
LLVM_ABI bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
LLVM_ABI bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
@ None
The insert/extract is not used with a load/store.
@ Load
The value being inserted comes from a load (InsertElement only).
@ Store
The extracted value is stored (ExtractElement only).
LLVM_ABI unsigned getPrefetchDistance() const
LLVM_ABI Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
LLVM_ABI bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
LLVM_ABI bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
LLVM_ABI bool preferEpilogueVectorization(ElementCount Iters) const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop if t...
LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
LLVM_ABI void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
LLVM_ABI bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
LLVM_ABI TailFoldingStyle getPreferredTailFoldingStyle() const
Query the target what the preferred style of tail folding is.
LLVM_ABI bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
LLVM_ABI std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
LLVM_ABI bool isProfitableLSRChainElement(Instruction *I) const
LLVM_ABI TypeSize getRegisterBitWidth(RegisterKind K) const
MaskKind
Some targets only support masked load/store with a constant mask.
LLVM_ABI unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
LLVM_ABI InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
Estimate the overhead of scalarizing operands with the given types.
LLVM_ABI bool hasActiveVectorLength() const
LLVM_ABI bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
LLVM_ABI bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const
LLVM_ABI bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
LLVM_ABI ValueUniformity getValueUniformity(const Value *V) const
Get target-specific uniformity information for a value.
static LLVM_ABI OperandValueInfo commonOperandInfo(const Value *X, const Value *Y)
Collect common data between two OperandValueInfo inputs.
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI std::optional< unsigned > getMaxVScale() const
LLVM_ABI InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
LLVM_ABI bool allowVectorElementIndexingUsingGEP() const
Returns true if GEP should not be used to index into vectors for this target.
LLVM_ABI InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
LLVM_ABI bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be preferred to create a tail-folded vector loop,...
LLVM_ABI bool isSingleThreaded() const
LLVM_ABI std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
LLVM_ABI bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
LLVM_ABI unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
LLVM_ABI InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const
LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
LLVM_ABI std::pair< KnownBits, KnownBits > computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const
LLVM_ABI bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
static LLVM_ABI PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)
Get the kind of extension that an instruction represents.
LLVM_ABI bool shouldConsiderVectorizationRegPressure() const
LLVM_ABI bool enableWritePrefetching() const
LLVM_ABI bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
LLVM_ABI bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
LLVM_ABI bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
LLVM_ABI InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
LLVM_ABI bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
Return true is the target supports interleaved access for the given vector type VTy,...
LLVM_ABI unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
LLVM_ABI bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
LLVM_ABI std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
LLVM_ABI InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
LLVM_ABI InstructionCost getRegisterClassReloadCost(unsigned ClassID, TargetCostKind CostKind) const
LLVM_ABI ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
LLVM_ABI PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
LLVM_ABI unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
LLVM_ABI bool isElementTypeLegalForScalableVector(Type *Ty) const
LLVM_ABI bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
LLVM_ABI unsigned getMaxPrefetchIterationsAhead() const
LLVM_ABI bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
LLVM_ABI ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
LLVM_ABI InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
LLVM_ABI bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
LLVM_ABI InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
LLVM_ABI TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
LLVM_ABI InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
LLVM_ABI bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
LLVM_ABI int getInliningLastCallToStaticBonus() const
LLVM_ABI InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
LLVM_ABI unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
LLVM_ABI unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy, Align Alignment, unsigned AddrSpace) const
LLVM_ABI bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
LLVM_ABI bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
LLVM_ABI std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
LLVM_ABI bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
LLVM_ABI bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const
Determine if an instruction with Custom uniformity can be proven uniform based on which operands are ...
LLVM_ABI InstructionCost getMemcpyCost(const Instruction *I) const
LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const
LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
LLVM_ABI bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
LLVM_ABI Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
LLVM_ABI InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
LLVM_ABI bool canSaveCmp(Loop *L, CondBrInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
LLVM_ABI unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
LLVM_ABI bool shouldPrefetchAddressSpace(unsigned AS) const
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI unsigned getMinVectorRegisterBitWidth() const
LLVM_ABI InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const
LLVM_ABI bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
LLVM_ABI unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
LLVM_ABI bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
LLVM_ABI bool hasArmWideBranch(bool Thumb) const
LLVM_ABI const char * getRegisterClassName(unsigned ClassID) const
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
LLVM_ABI APInt getPriorityMask(const Function &F) const
Returns a bitmask constructed from the target-features or fmv-features metadata of a function corresp...
LLVM_ABI BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
LLVM_ABI TargetTransformInfo(std::unique_ptr< const TargetTransformInfoImplBase > Impl)
Construct a TTI object using a type implementing the Concept API below.
LLVM_ABI bool preferInLoopReduction(RecurKind Kind, Type *Ty) const
LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
LLVM_ABI bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
LLVM_ABI unsigned getCacheLineSize() const
LLVM_ABI bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
LLVM_ABI bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const
LLVM_ABI APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS, unsigned DstAS) const
Returns a mask indicating which bits of a pointer remain unchanged when casting between address space...
LLVM_ABI int getInlinerVectorBonusPercent() const
LLVM_ABI unsigned getEpilogueVectorizationMinVF() const
LLVM_ABI void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
Collect kernel launch bounds for F into LB.
PopcntSupportKind
Flags indicating the kind of support for population count.
LLVM_ABI bool preferPredicatedReductionSelect() const
LLVM_ABI InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
LLVM_ABI AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
LLVM_ABI bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
LLVM_ABI unsigned getInliningThresholdMultiplier() const
LLVM_ABI InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const
LLVM_ABI bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
LLVM_ABI bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
LLVM_ABI bool supportsScalableVectors() const
LLVM_ABI bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
LLVM_ABI bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
LLVM_ABI std::optional< unsigned > getMinPageSize() const
LLVM_ABI bool preferSLPInstCountCheck() const
LLVM_ABI bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
LLVM_ABI InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
LLVM_ABI bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
LLVM_ABI std::optional< unsigned > getCacheSize(CacheLevel Level) const
LLVM_ABI std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
LLVM_ABI bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
LLVM_ABI bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
LLVM_ABI InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
LLVM_ABI unsigned getMinTripCountTailFoldingThreshold() const
LLVM_ABI InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
LLVM_ABI unsigned getMaxInterleaveFactor(ElementCount VF) const
LLVM_ABI bool enableScalableVectorization() const
LLVM_ABI bool useFastCCForInternalCall(Function &F) const
Return true if the input function is internal, should use fastcc calling convention.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
LLVM_ABI unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
LLVM_ABI bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
LLVM_ABI unsigned getGISelRematGlobalCost() const
LLVM_ABI unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
static LLVM_ABI Instruction::CastOps getOpcodeForPartialReductionExtendKind(PartialReductionExtendKind Kind)
Get the cast opcode for an extension kind.
MemIndexedMode
The type of load/store indexing.
LLVM_ABI bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const
Return true if the target supports masked load.
LLVM_ABI InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
LLVM_ABI bool areInlineCompatible(const Function *Caller, const Function *Callee) const
LLVM_ABI bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
LLVM_ABI InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
LLVM_ABI bool supportsTailCalls() const
If the target supports tail calls.
LLVM_ABI bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
LLVM_ABI bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
LLVM_ABI unsigned getNumberOfParts(Type *Tp) const
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_PostIndexed
Prefer post-indexed addressing mode.
@ AMK_All
Consider all addressing modes.
@ AMK_PreIndexed
Prefer pre-indexed addressing mode.
@ AMK_None
Don't prefer any addressing mode.
LLVM_ABI InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
LLVM_ABI void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
LLVM_ABI bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
LLVM_ABI bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
static LLVM_ABI VectorInstrContext getVectorInstrContextHint(const Instruction *I)
Calculates a VectorInstrContext from I.
LLVM_ABI bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const
LLVM_ABI InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
Estimate the overhead of scalarizing an instruction.
LLVM_ABI uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
LLVM_ABI APInt getFeatureMask(const Function &F) const
Returns a bitmask constructed from the target-features or fmv-features metadata of a function corresp...
LLVM_ABI void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
LLVM_ABI InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
LLVM_ABI InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
LLVM_ABI InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getRegisterClassSpillCost(unsigned ClassID, TargetCostKind CostKind) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
CallInst * Call
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
RecurKind
These are the kinds of recurrences that we support.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
Definition Uniformity.h:18
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
SmallVector< InterestingMemoryOperand, 1 > InterestingOperands
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
OperandValueInfo mergeWith(const OperandValueInfo OpInfoY)
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
bool AddAdditionalAccumulators
Allow unrolling to add parallel reduction phis.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)