LLVM 20.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/Support/Debug.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
23WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26}
27
28unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30
31 // For SIMD, use at least 16 registers, as a rough guess.
32 bool Vector = (ClassID == 1);
33 if (Vector)
34 Result = std::max(Result, 16u);
35
36 return Result;
37}
38
41 switch (K) {
43 return TypeSize::getFixed(64);
45 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47 return TypeSize::getScalable(0);
48 }
49
50 llvm_unreachable("Unsupported register kind");
51}
52
54 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
57 const Instruction *CxtI) {
58
61 Opcode, Ty, CostKind, Op1Info, Op2Info);
62
63 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
64 switch (Opcode) {
65 case Instruction::LShr:
66 case Instruction::AShr:
67 case Instruction::Shl:
68 // SIMD128's shifts currently only accept a scalar shift count. For each
69 // element, we'll need to extract, op, insert. The following is a rough
70 // approximation.
71 if (!Op2Info.isUniform())
72 Cost =
73 cast<FixedVectorType>(VTy)->getNumElements() *
75 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
77 break;
78 }
79 }
80 return Cost;
81}
82
86 unsigned Index, Value *Op0, Value *Op1) {
88 Opcode, Val, CostKind, Index, Op0, Op1);
89
90 // SIMD128's insert/extract currently only take constant indices.
91 if (Index == -1u)
93
94 return Cost;
95}
96
98 const IntrinsicInst *II) const {
99
100 switch (II->getIntrinsicID()) {
101 default:
102 break;
103 case Intrinsic::vector_reduce_fadd:
105 }
107}
108
110 const Function *Callee) const {
111 // Allow inlining only when the Callee has a subset of the Caller's
112 // features. In principle, we should be able to inline regardless of any
113 // features because WebAssembly supports features at module granularity, not
114 // function granularity, but without this restriction it would be possible for
115 // a module to "forget" about features if all the functions that used them
116 // were inlined.
117 const TargetMachine &TM = getTLI()->getTargetMachine();
118
119 const FeatureBitset &CallerBits =
120 TM.getSubtargetImpl(*Caller)->getFeatureBits();
121 const FeatureBitset &CalleeBits =
122 TM.getSubtargetImpl(*Callee)->getFeatureBits();
123
124 return (CallerBits & CalleeBits) == CalleeBits;
125}
126
129 OptimizationRemarkEmitter *ORE) const {
130 // Scan the loop: don't unroll loops with calls. This is a standard approach
131 // for most (all?) targets.
132 for (BasicBlock *BB : L->blocks())
133 for (Instruction &I : *BB)
134 if (isa<CallInst>(I) || isa<InvokeInst>(I))
135 if (const Function *F = cast<CallBase>(I).getCalledFunction())
136 if (isLoweredToCall(F))
137 return;
138
139 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
140 // the various microarchitectures that use the BasicTTI implementation and
141 // has been selected through heuristics across multiple cores and runtimes.
142 UP.Partial = UP.Runtime = UP.UpperBound = true;
143 UP.PartialThreshold = 30;
144
145 // Avoid unrolling when optimizing for size.
146 UP.OptSizeThreshold = 0;
148
149 // Set number of instructions optimized when "back edge"
150 // becomes "fall through" to default value of 2.
151 UP.BEInsns = 2;
152}
153
155 return getST()->hasTailCall();
156}
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file a TargetTransformInfo::Concept conforming object specific to the WebAssembly target machine...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:897
Container class for subtarget features.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
The optimization diagnostic interface.
The main scalar evolution driver.
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isLoweredToCall(const Function *F) const
unsigned getNumberOfRegisters(unsigned ClassID) const
TargetCostKind
The kind of cost model.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
unsigned getNumberOfRegisters(unsigned ClassID) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
InstructionCost Cost
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).