LLVM 17.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/Support/Debug.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
23WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26}
27
28unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30
31 // For SIMD, use at least 16 registers, as a rough guess.
32 bool Vector = (ClassID == 1);
33 if (Vector)
34 Result = std::max(Result, 16u);
35
36 return Result;
37}
38
41 switch (K) {
43 return TypeSize::getFixed(64);
45 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47 return TypeSize::getScalable(0);
48 }
49
50 llvm_unreachable("Unsupported register kind");
51}
52
54 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
57 const Instruction *CxtI) {
58
61 Opcode, Ty, CostKind, Op1Info, Op2Info);
62
63 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
64 switch (Opcode) {
65 case Instruction::LShr:
66 case Instruction::AShr:
67 case Instruction::Shl:
68 // SIMD128's shifts currently only accept a scalar shift count. For each
69 // element, we'll need to extract, op, insert. The following is a rough
70 // approximation.
71 if (!Op2Info.isUniform())
72 Cost =
73 cast<FixedVectorType>(VTy)->getNumElements() *
75 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
77 break;
78 }
79 }
80 return Cost;
81}
82
86 unsigned Index, Value *Op0, Value *Op1) {
88 Opcode, Val, CostKind, Index, Op0, Op1);
89
90 // SIMD128's insert/extract currently only take constant indices.
91 if (Index == -1u)
93
94 return Cost;
95}
96
98 const Function *Callee) const {
99 // Allow inlining only when the Callee has a subset of the Caller's
100 // features. In principle, we should be able to inline regardless of any
101 // features because WebAssembly supports features at module granularity, not
102 // function granularity, but without this restriction it would be possible for
103 // a module to "forget" about features if all the functions that used them
104 // were inlined.
105 const TargetMachine &TM = getTLI()->getTargetMachine();
106
107 const FeatureBitset &CallerBits =
108 TM.getSubtargetImpl(*Caller)->getFeatureBits();
109 const FeatureBitset &CalleeBits =
110 TM.getSubtargetImpl(*Callee)->getFeatureBits();
111
112 return (CallerBits & CalleeBits) == CalleeBits;
113}
114
117 OptimizationRemarkEmitter *ORE) const {
118 // Scan the loop: don't unroll loops with calls. This is a standard approach
119 // for most (all?) targets.
120 for (BasicBlock *BB : L->blocks())
121 for (Instruction &I : *BB)
122 if (isa<CallInst>(I) || isa<InvokeInst>(I))
123 if (const Function *F = cast<CallBase>(I).getCalledFunction())
124 if (isLoweredToCall(F))
125 return;
126
127 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
128 // the various microarchitectures that use the BasicTTI implementation and
129 // has been selected through heuristics across multiple cores and runtimes.
130 UP.Partial = UP.Runtime = UP.UpperBound = true;
131 UP.PartialThreshold = 30;
132
133 // Avoid unrolling when optimizing for size.
134 UP.OptSizeThreshold = 0;
136
137 // Set number of instructions optimized when "back edge"
138 // becomes "fall through" to default value of 2.
139 UP.BEInsns = 2;
140}
141
143 return getST()->hasTailCall();
144}
amdgpu Simplify well known AMD library false FunctionCallee Callee
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file a TargetTransformInfo::Concept conforming object specific to the WebAssembly target machine...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:849
Container class for subtarget features.
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
The optimization diagnostic interface.
The main scalar evolution driver.
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
bool isLoweredToCall(const Function *F) const
unsigned getNumberOfRegisters(unsigned ClassID) const
TargetCostKind
The kind of cost model.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
static constexpr TypeSize getScalable(ScalarTy MinimunSize)
Definition: TypeSize.h:325
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).