LLVM 22.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
16
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
23WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26}
27
28unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30
31 // For SIMD, use at least 16 registers, as a rough guess.
32 bool Vector = (ClassID == 1);
33 if (Vector)
34 Result = std::max(Result, 16u);
35
36 return Result;
37}
38
41 switch (K) {
43 return TypeSize::getFixed(64);
45 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47 return TypeSize::getScalable(0);
48 }
49
50 llvm_unreachable("Unsupported register kind");
51}
52
54 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
56 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
57
60 Opcode, Ty, CostKind, Op1Info, Op2Info);
61
62 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
63 switch (Opcode) {
64 case Instruction::LShr:
65 case Instruction::AShr:
66 case Instruction::Shl:
67 // SIMD128's shifts currently only accept a scalar shift count. For each
68 // element, we'll need to extract, op, insert. The following is a rough
69 // approximation.
70 if (!Op2Info.isUniform())
71 Cost =
72 cast<FixedVectorType>(VTy)->getNumElements() *
74 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
76 break;
77 }
78 }
79 return Cost;
80}
81
83 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
85 int ISD = TLI->InstructionOpcodeToISD(Opcode);
86 auto SrcTy = TLI->getValueType(DL, Src);
87 auto DstTy = TLI->getValueType(DL, Dst);
88
89 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
90 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
91 }
92
93 if (!ST->hasSIMD128()) {
94 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
95 }
96
97 auto DstVT = DstTy.getSimpleVT();
98 auto SrcVT = SrcTy.getSimpleVT();
99
100 if (I && I->hasOneUser()) {
101 auto *SingleUser = cast<Instruction>(*I->user_begin());
102 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
103
104 // extmul_low support
105 if (UserISD == ISD::MUL &&
107 // Free low extensions.
108 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
109 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
110 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
111 return 0;
112 }
113 // Will require an additional extlow operation for the intermediate
114 // i16/i32 value.
115 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
116 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
117 return 1;
118 }
119 }
120 }
121
122 static constexpr TypeConversionCostTblEntry ConversionTbl[] = {
123 // extend_low
124 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},
125 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},
126 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},
127 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},
128 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},
129 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},
130 // 2 x extend_low
131 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},
132 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},
133 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},
134 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},
135 // extend_low, extend_high
136 {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},
137 {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},
138 {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},
139 {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},
140 {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},
141 {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},
142 // 2x extend_low, extend_high
143 {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},
144 {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},
145 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},
146 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},
147 // shuffle
148 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},
149 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},
150 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},
151 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},
152 // narrow, and
153 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},
154 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},
155 // narrow, 2x and
156 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},
157 // 3x narrow, 4x and
158 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},
159 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},
160 // 7x narrow, 8x and
161 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},
162 // convert_i32x4
163 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
164 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
165 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
166 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
167 // extend_low, convert
168 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
169 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
170 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
171 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
172 // extend_low x 2, convert
173 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
174 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
175 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
176 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
177 // several shuffles
178 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
179 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
180 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},
181 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
182 /// trunc_sat, const, and, 3x narrow
183 {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},
184 {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},
185 {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},
186 {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},
187 /// trunc_sat, const, and, narrow
188 {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},
189 {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},
190 {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},
191 {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},
192 // 2x trunc_sat, const, 2x and, 3x narrow
193 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},
194 {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},
195 // 2x trunc_sat, const, 2x and, narrow
196 {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},
197 {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},
198 };
199
200 if (const auto *Entry =
201 ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {
202 return Entry->Cost;
203 }
204
205 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
206}
207
209WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
211
212 Options.AllowOverlappingLoads = true;
213
214 if (ST->hasSIMD128())
215 Options.LoadSizes.push_back(16);
216
217 Options.LoadSizes.append({8, 4, 2, 1});
218 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
219 Options.NumLoadsPerBlock = Options.MaxNumLoads;
220
221 return Options;
222}
223
225 unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
227 const Instruction *I) const {
228 if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty)) {
229 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
230 CostKind);
231 }
232
233 EVT VT = TLI->getValueType(DL, Ty, true);
234 // Type legalization can't handle structs
235 if (VT == MVT::Other)
236 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
237 CostKind);
238
239 auto LT = getTypeLegalizationCost(Ty);
240 if (!LT.first.isValid())
242
243 int ISD = TLI->InstructionOpcodeToISD(Opcode);
244 unsigned width = VT.getSizeInBits();
245 if (ISD == ISD::LOAD) {
246 // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads
247 // can be lowered to load32_zero and load64_zero respectively. Assume SIMD
248 // loads are twice as expensive as scalar.
249 switch (width) {
250 default:
251 break;
252 case 32:
253 case 64:
254 case 128:
255 return 2;
256 }
257 } else if (ISD == ISD::STORE) {
258 // For stores, we can use store lane operations.
259 switch (width) {
260 default:
261 break;
262 case 8:
263 case 16:
264 case 32:
265 case 64:
266 case 128:
267 return 2;
268 }
269 }
270
271 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);
272}
273
275 unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,
276 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
277 bool UseMaskForCond, bool UseMaskForGaps) const {
278 assert(Factor >= 2 && "Invalid interleave factor");
279
280 auto *VecTy = cast<VectorType>(Ty);
281 if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {
283 }
284
285 if (UseMaskForCond || UseMaskForGaps)
286 return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,
287 Alignment, AddressSpace, CostKind,
288 UseMaskForCond, UseMaskForGaps);
289
290 constexpr unsigned MaxInterleaveFactor = 4;
291 if (Factor <= MaxInterleaveFactor) {
292 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
293 // Ensure the number of vector elements is greater than 1.
294 if (MinElts < 2 || MinElts % Factor != 0)
296
297 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
298 // Ensure the element type is legal.
299 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
301
302 auto *SubVecTy =
303 VectorType::get(VecTy->getElementType(),
304 VecTy->getElementCount().divideCoefficientBy(Factor));
305 InstructionCost MemCost =
306 getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);
307
308 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
309 unsigned MaxVecSize = 128;
310 unsigned NumAccesses =
311 std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
312
313 // A stride of two is commonly supported via dedicated instructions, so it
314 // should be relatively cheap for all element sizes. A stride of four is
315 // more expensive as it will likely require more shuffles. Using two
316 // simd128 inputs is considered more expensive and we mainly account for
317 // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable
318 // arithmetic kernels.
319 static const CostTblEntry ShuffleCostTbl[] = {
320 // One reg.
321 {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8
322 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8
323 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8
324 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16
325 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16
326 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32
327
328 // Two regs.
329 {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8
330 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16
331 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32
332
333 // One reg.
334 {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8
335 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8
336 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16
337
338 // Two regs.
339 {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8
340 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16
341 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32
342
343 // Four regs.
344 {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32
345 };
346
347 EVT ETy = TLI->getValueType(DL, SubVecTy);
348 if (const auto *Entry =
349 CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))
350 return Entry->Cost + (NumAccesses * MemCost);
351 }
352
353 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
354 Alignment, AddressSpace, CostKind,
355 UseMaskForCond, UseMaskForGaps);
356}
357
359 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
360 const Value *Op0, const Value *Op1) const {
362 Opcode, Val, CostKind, Index, Op0, Op1);
363
364 // SIMD128's insert/extract currently only take constant indices.
365 if (Index == -1u)
367
368 return Cost;
369}
370
372 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
374 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
377 if (!VF.isFixed() || !ST->hasSIMD128())
378 return Invalid;
379
381 return Invalid;
382
383 if (Opcode != Instruction::Add)
384 return Invalid;
385
386 EVT AccumEVT = EVT::getEVT(AccumType);
387 // TODO: Add i64 accumulator.
388 if (AccumEVT != MVT::i32)
389 return Invalid;
390
391 // Possible options:
392 // - i16x8.extadd_pairwise_i8x16_sx
393 // - i32x4.extadd_pairwise_i16x8_sx
394 // - i32x4.dot_i16x8_s
395 // Only try to support dot, for now.
396
397 EVT InputEVT = EVT::getEVT(InputTypeA);
398 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
399 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
400 return Invalid;
401 }
402
403 if (OpAExtend == TTI::PR_None)
404 return Invalid;
405
407 if (!BinOp)
408 return Cost;
409
410 if (OpAExtend != OpBExtend)
411 return Invalid;
412
413 if (*BinOp != Instruction::Mul)
414 return Invalid;
415
416 if (InputTypeA != InputTypeB)
417 return Invalid;
418
419 // Signed inputs can lower to dot
420 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
421 return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;
422
423 // Double the size of the lowered sequence.
424 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
425 return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;
426
427 return Invalid;
428}
429
431 const IntrinsicInst *II) const {
432
433 switch (II->getIntrinsicID()) {
434 default:
435 break;
436 case Intrinsic::vector_reduce_fadd:
438 }
440}
441
444 OptimizationRemarkEmitter *ORE) const {
445 // Scan the loop: don't unroll loops with calls. This is a standard approach
446 // for most (all?) targets.
447 for (BasicBlock *BB : L->blocks())
448 for (Instruction &I : *BB)
451 if (isLoweredToCall(F))
452 return;
453
454 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
455 // the various microarchitectures that use the BasicTTI implementation and
456 // has been selected through heuristics across multiple cores and runtimes.
457 UP.Partial = UP.Runtime = UP.UpperBound = true;
458 UP.PartialThreshold = 30;
459
460 // Avoid unrolling when optimizing for size.
461 UP.OptSizeThreshold = 0;
463
464 // Set number of instructions optimized when "back edge"
465 // becomes "fall through" to default value of 2.
466 UP.BEInsns = 2;
467}
468
470 return getST()->hasTailCall();
471}
472
475 using namespace llvm::PatternMatch;
476
477 if (!I->getType()->isVectorTy() || !I->isShift())
478 return false;
479
480 Value *V = I->getOperand(1);
481 // We dont need to sink constant splat.
482 if (isa<Constant>(V))
483 return false;
484
486 m_Value(), m_ZeroMask()))) {
487 // Sink insert
488 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
489 // Sink shuffle
490 Ops.push_back(&I->getOperandUse(1));
491 return true;
492 }
493
494 return false;
495}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35
InstructionCost Cost
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).