LLVM  14.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific TargetTransformInfo
11 /// implementation.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/Support/Debug.h"
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "wasmtti"
21 
23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26 }
27 
28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29  unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30 
31  // For SIMD, use at least 16 registers, as a rough guess.
32  bool Vector = (ClassID == 1);
33  if (Vector)
34  Result = std::max(Result, 16u);
35 
36  return Result;
37 }
38 
41  switch (K) {
43  return TypeSize::getFixed(64);
45  return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47  return TypeSize::getScalable(0);
48  }
49 
50  llvm_unreachable("Unsupported register kind");
51 }
52 
54  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
56  TTI::OperandValueProperties Opd1PropInfo,
58  const Instruction *CxtI) {
59 
60  InstructionCost Cost =
62  Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
63 
64  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
65  switch (Opcode) {
66  case Instruction::LShr:
67  case Instruction::AShr:
68  case Instruction::Shl:
69  // SIMD128's shifts currently only accept a scalar shift count. For each
70  // element, we'll need to extract, op, insert. The following is a rough
71  // approxmation.
72  if (Opd2Info != TTI::OK_UniformValue &&
73  Opd2Info != TTI::OK_UniformConstantValue)
74  Cost =
75  cast<FixedVectorType>(VTy)->getNumElements() *
77  getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
79  break;
80  }
81  }
82  return Cost;
83 }
84 
86  Type *Val,
87  unsigned Index) {
88  InstructionCost Cost =
90 
91  // SIMD128's insert/extract currently only take constant indices.
92  if (Index == -1u)
93  return Cost + 25 * TargetTransformInfo::TCC_Expensive;
94 
95  return Cost;
96 }
97 
99  const Function *Callee) const {
100  // Allow inlining only when the Callee has a subset of the Caller's
101  // features. In principle, we should be able to inline regardless of any
102  // features because WebAssembly supports features at module granularity, not
103  // function granularity, but without this restriction it would be possible for
104  // a module to "forget" about features if all the functions that used them
105  // were inlined.
106  const TargetMachine &TM = getTLI()->getTargetMachine();
107 
108  const FeatureBitset &CallerBits =
109  TM.getSubtargetImpl(*Caller)->getFeatureBits();
110  const FeatureBitset &CalleeBits =
111  TM.getSubtargetImpl(*Callee)->getFeatureBits();
112 
113  return (CallerBits & CalleeBits) == CalleeBits;
114 }
115 
118  OptimizationRemarkEmitter *ORE) const {
119  // Scan the loop: don't unroll loops with calls. This is a standard approach
120  // for most (all?) targets.
121  for (BasicBlock *BB : L->blocks())
122  for (Instruction &I : *BB)
123  if (isa<CallInst>(I) || isa<InvokeInst>(I))
124  if (const Function *F = cast<CallBase>(I).getCalledFunction())
125  if (isLoweredToCall(F))
126  return;
127 
128  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
129  // the various microarchitectures that use the BasicTTI implementation and
130  // has been selected through heuristics across multiple cores and runtimes.
131  UP.Partial = UP.Runtime = UP.UpperBound = true;
132  UP.PartialThreshold = 30;
133 
134  // Avoid unrolling when optimizing for size.
135  UP.OptSizeThreshold = 0;
137 
138  // Set number of instructions optimized when "back edge"
139  // becomes "fall through" to default value of 2.
140  UP.BEInsns = 2;
141 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:592
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:880
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:592
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:907
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:751
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:481
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
WebAssemblyTargetTransformInfo.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::TargetTransformInfoImplBase::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfoImpl.h:378
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1108
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:907
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:118
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::WebAssemblyTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: WebAssemblyTargetTransformInfo.cpp:85
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:881
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:886
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:118
llvm::WebAssemblyTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Definition: WebAssemblyTargetTransformInfo.cpp:116
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:878
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:339
CostTable.h
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:495
llvm::TypeSize
Definition: TypeSize.h:417
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::WebAssemblyTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
Definition: WebAssemblyTargetTransformInfo.cpp:23
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:907
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::WebAssemblyTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: WebAssemblyTargetTransformInfo.cpp:39
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:907
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::WebAssemblyTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: WebAssemblyTargetTransformInfo.cpp:53
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::WebAssemblyTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: WebAssemblyTargetTransformInfo.cpp:28
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::WebAssemblyTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: WebAssemblyTargetTransformInfo.cpp:98
Debug.h