LLVM  15.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific TargetTransformInfo
11 /// implementation.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/Support/Debug.h"
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "wasmtti"
21 
23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26 }
27 
28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29  unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30 
31  // For SIMD, use at least 16 registers, as a rough guess.
32  bool Vector = (ClassID == 1);
33  if (Vector)
34  Result = std::max(Result, 16u);
35 
36  return Result;
37 }
38 
41  switch (K) {
43  return TypeSize::getFixed(64);
45  return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47  return TypeSize::getScalable(0);
48  }
49 
50  llvm_unreachable("Unsupported register kind");
51 }
52 
54  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
56  TTI::OperandValueProperties Opd1PropInfo,
58  const Instruction *CxtI) {
59 
60  InstructionCost Cost =
62  Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
63 
64  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
65  switch (Opcode) {
66  case Instruction::LShr:
67  case Instruction::AShr:
68  case Instruction::Shl:
69  // SIMD128's shifts currently only accept a scalar shift count. For each
70  // element, we'll need to extract, op, insert. The following is a rough
71  // approxmation.
72  if (Opd2Info != TTI::OK_UniformValue &&
73  Opd2Info != TTI::OK_UniformConstantValue)
74  Cost =
75  cast<FixedVectorType>(VTy)->getNumElements() *
77  getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
79  break;
80  }
81  }
82  return Cost;
83 }
84 
86  Type *Val,
87  unsigned Index) {
88  InstructionCost Cost =
89  BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
90 
91  // SIMD128's insert/extract currently only take constant indices.
92  if (Index == -1u)
93  return Cost + 25 * TargetTransformInfo::TCC_Expensive;
94 
95  return Cost;
96 }
97 
99  const Function *Callee) const {
100  // Allow inlining only when the Callee has a subset of the Caller's
101  // features. In principle, we should be able to inline regardless of any
102  // features because WebAssembly supports features at module granularity, not
103  // function granularity, but without this restriction it would be possible for
104  // a module to "forget" about features if all the functions that used them
105  // were inlined.
106  const TargetMachine &TM = getTLI()->getTargetMachine();
107 
108  const FeatureBitset &CallerBits =
109  TM.getSubtargetImpl(*Caller)->getFeatureBits();
110  const FeatureBitset &CalleeBits =
111  TM.getSubtargetImpl(*Callee)->getFeatureBits();
112 
113  return (CallerBits & CalleeBits) == CalleeBits;
114 }
115 
118  OptimizationRemarkEmitter *ORE) const {
119  // Scan the loop: don't unroll loops with calls. This is a standard approach
120  // for most (all?) targets.
121  for (BasicBlock *BB : L->blocks())
122  for (Instruction &I : *BB)
123  if (isa<CallInst>(I) || isa<InvokeInst>(I))
124  if (const Function *F = cast<CallBase>(I).getCalledFunction())
125  if (isLoweredToCall(F))
126  return;
127 
128  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
129  // the various microarchitectures that use the BasicTTI implementation and
130  // has been selected through heuristics across multiple cores and runtimes.
131  UP.Partial = UP.Runtime = UP.UpperBound = true;
132  UP.PartialThreshold = 30;
133 
134  // Avoid unrolling when optimizing for size.
135  UP.OptSizeThreshold = 0;
137 
138  // Set number of instructions optimized when "back edge"
139  // becomes "fall through" to default value of 2.
140  UP.BEInsns = 2;
141 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:594
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:480
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:459
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:487
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:210
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:263
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:455
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:892
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:594
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:919
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:776
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:483
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
WebAssemblyTargetTransformInfo.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetTransformInfoImplBase::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfoImpl.h:394
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1138
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:919
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::WebAssemblyTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: WebAssemblyTargetTransformInfo.cpp:85
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:893
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:430
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:898
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:121
llvm::WebAssemblyTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Definition: WebAssemblyTargetTransformInfo.cpp:116
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:890
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:347
CostTable.h
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:497
llvm::TypeSize
Definition: TypeSize.h:421
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::WebAssemblyTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
Definition: WebAssemblyTargetTransformInfo.cpp:23
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:919
llvm::WebAssemblyTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: WebAssemblyTargetTransformInfo.cpp:39
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:919
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::WebAssemblyTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: WebAssemblyTargetTransformInfo.cpp:53
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:452
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:262
llvm::WebAssemblyTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: WebAssemblyTargetTransformInfo.cpp:28
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::WebAssemblyTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: WebAssemblyTargetTransformInfo.cpp:98
Debug.h