LLVM 20.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
16using namespace llvm;
17
18#define DEBUG_TYPE "wasmtti"
19
21WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
22 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
24}
25
26unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
27 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
28
29 // For SIMD, use at least 16 registers, as a rough guess.
30 bool Vector = (ClassID == 1);
31 if (Vector)
32 Result = std::max(Result, 16u);
33
34 return Result;
35}
36
39 switch (K) {
41 return TypeSize::getFixed(64);
43 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
45 return TypeSize::getScalable(0);
46 }
47
48 llvm_unreachable("Unsupported register kind");
49}
50
52 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
55 const Instruction *CxtI) {
56
59 Opcode, Ty, CostKind, Op1Info, Op2Info);
60
61 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
62 switch (Opcode) {
63 case Instruction::LShr:
64 case Instruction::AShr:
65 case Instruction::Shl:
66 // SIMD128's shifts currently only accept a scalar shift count. For each
67 // element, we'll need to extract, op, insert. The following is a rough
68 // approximation.
69 if (!Op2Info.isUniform())
70 Cost =
71 cast<FixedVectorType>(VTy)->getNumElements() *
73 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
75 break;
76 }
77 }
78 return Cost;
79}
80
84 unsigned Index, Value *Op0, Value *Op1) {
86 Opcode, Val, CostKind, Index, Op0, Op1);
87
88 // SIMD128's insert/extract currently only take constant indices.
89 if (Index == -1u)
91
92 return Cost;
93}
94
96 const IntrinsicInst *II) const {
97
98 switch (II->getIntrinsicID()) {
99 default:
100 break;
101 case Intrinsic::vector_reduce_fadd:
103 }
105}
106
109 OptimizationRemarkEmitter *ORE) const {
110 // Scan the loop: don't unroll loops with calls. This is a standard approach
111 // for most (all?) targets.
112 for (BasicBlock *BB : L->blocks())
113 for (Instruction &I : *BB)
114 if (isa<CallInst>(I) || isa<InvokeInst>(I))
115 if (const Function *F = cast<CallBase>(I).getCalledFunction())
116 if (isLoweredToCall(F))
117 return;
118
119 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
120 // the various microarchitectures that use the BasicTTI implementation and
121 // has been selected through heuristics across multiple cores and runtimes.
122 UP.Partial = UP.Runtime = UP.UpperBound = true;
123 UP.PartialThreshold = 30;
124
125 // Avoid unrolling when optimizing for size.
126 UP.OptSizeThreshold = 0;
128
129 // Set number of instructions optimized when "back edge"
130 // becomes "fall through" to default value of 2.
131 UP.BEInsns = 2;
132}
133
135 return getST()->hasTailCall();
136}
137
139 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
140 using namespace llvm::PatternMatch;
141
142 if (!I->getType()->isVectorTy() || !I->isShift())
143 return false;
144
145 Value *V = I->getOperand(1);
146 // We dont need to sink constant splat.
147 if (dyn_cast<Constant>(V))
148 return false;
149
151 m_Value(), m_ZeroMask()))) {
152 // Sink insert
153 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
154 // Sink shuffle
155 Ops.push_back(&I->getOperandUse(1));
156 return true;
157 }
158
159 return false;
160}
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file a TargetTransformInfo::Concept conforming object specific to the WebAssembly target machine...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:932
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
The optimization diagnostic interface.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
bool isLoweredToCall(const Function *F) const
unsigned getNumberOfRegisters(unsigned ClassID) const
TargetCostKind
The kind of cost model.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
unsigned getNumberOfRegisters(unsigned ClassID) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
InstructionCost Cost
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).